merging with branch beta

This commit is contained in:
Miriam Baglioni 2021-08-11 15:55:34 +02:00
commit 8da3a25cf6
309 changed files with 1979 additions and 2590 deletions

View File

@ -8,8 +8,6 @@ import java.util.List;
import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.maven.plugin.AbstractMojo; import org.apache.maven.plugin.AbstractMojo;
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.plugin.MojoFailureException;
/** /**
* Generates oozie properties which were not provided from commandline. * Generates oozie properties which were not provided from commandline.
@ -27,7 +25,7 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo {
}; };
@Override @Override
public void execute() throws MojoExecutionException, MojoFailureException { public void execute() {
if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR) if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR)
&& !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) { && !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
String generatedSandboxName = generateSandboxName( String generatedSandboxName = generateSandboxName(
@ -46,24 +44,24 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo {
/** /**
* Generates sandbox name from workflow source directory. * Generates sandbox name from workflow source directory.
* *
* @param wfSourceDir * @param wfSourceDir workflow source directory
* @return generated sandbox name * @return generated sandbox name
*/ */
private String generateSandboxName(String wfSourceDir) { private String generateSandboxName(String wfSourceDir) {
// utilize all dir names until finding one of the limiters // utilize all dir names until finding one of the limiters
List<String> sandboxNameParts = new ArrayList<String>(); List<String> sandboxNameParts = new ArrayList<>();
String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar); String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar);
ArrayUtils.reverse(tokens); ArrayUtils.reverse(tokens);
if (tokens.length > 0) { if (tokens.length > 0) {
for (String token : tokens) { for (String token : tokens) {
for (String limiter : limiters) { for (String limiter : limiters) {
if (limiter.equals(token)) { if (limiter.equals(token)) {
return sandboxNameParts.size() > 0 return !sandboxNameParts.isEmpty()
? StringUtils.join(sandboxNameParts.toArray()) ? StringUtils.join(sandboxNameParts.toArray())
: null; : null;
} }
} }
if (sandboxNameParts.size() > 0) { if (!sandboxNameParts.isEmpty()) {
sandboxNameParts.add(0, File.separator); sandboxNameParts.add(0, File.separator);
} }
sandboxNameParts.add(0, token); sandboxNameParts.add(0, token);

View File

@ -16,6 +16,7 @@ import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
@ -289,7 +290,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
*/ */
protected List<String> getEscapeChars(String escapeChars) { protected List<String> getEscapeChars(String escapeChars) {
List<String> tokens = getListFromCSV(escapeChars); List<String> tokens = getListFromCSV(escapeChars);
List<String> realTokens = new ArrayList<String>(); List<String> realTokens = new ArrayList<>();
for (String token : tokens) { for (String token : tokens) {
String realToken = getRealToken(token); String realToken = getRealToken(token);
realTokens.add(realToken); realTokens.add(realToken);
@ -324,7 +325,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
* @return content * @return content
*/ */
protected String getContent(String comment, Properties properties, List<String> escapeTokens) { protected String getContent(String comment, Properties properties, List<String> escapeTokens) {
List<String> names = new ArrayList<String>(properties.stringPropertyNames()); List<String> names = new ArrayList<>(properties.stringPropertyNames());
Collections.sort(names); Collections.sort(names);
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
if (!StringUtils.isBlank(comment)) { if (!StringUtils.isBlank(comment)) {
@ -352,7 +353,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
throws MojoExecutionException { throws MojoExecutionException {
try { try {
String content = getContent(comment, properties, escapeTokens); String content = getContent(comment, properties, escapeTokens);
FileUtils.writeStringToFile(file, content, ENCODING_UTF8); FileUtils.writeStringToFile(file, content, StandardCharsets.UTF_8);
} catch (IOException e) { } catch (IOException e) {
throw new MojoExecutionException("Error creating properties file", e); throw new MojoExecutionException("Error creating properties file", e);
} }
@ -399,9 +400,9 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
*/ */
protected static final List<String> getListFromCSV(String csv) { protected static final List<String> getListFromCSV(String csv) {
if (StringUtils.isBlank(csv)) { if (StringUtils.isBlank(csv)) {
return new ArrayList<String>(); return new ArrayList<>();
} }
List<String> list = new ArrayList<String>(); List<String> list = new ArrayList<>();
String[] tokens = StringUtils.split(csv, ","); String[] tokens = StringUtils.split(csv, ",");
for (String token : tokens) { for (String token : tokens) {
list.add(token.trim()); list.add(token.trim());

View File

@ -9,18 +9,18 @@ import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
/** @author mhorst, claudio.atzori */ /** @author mhorst, claudio.atzori */
public class GenerateOoziePropertiesMojoTest { class GenerateOoziePropertiesMojoTest {
private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo(); private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
@BeforeEach @BeforeEach
public void clearSystemProperties() { void clearSystemProperties() {
System.clearProperty(PROPERTY_NAME_SANDBOX_NAME); System.clearProperty(PROPERTY_NAME_SANDBOX_NAME);
System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR); System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR);
} }
@Test @Test
public void testExecuteEmpty() throws Exception { void testExecuteEmpty() throws Exception {
// execute // execute
mojo.execute(); mojo.execute();
@ -29,7 +29,7 @@ public class GenerateOoziePropertiesMojoTest {
} }
@Test @Test
public void testExecuteSandboxNameAlreadySet() throws Exception { void testExecuteSandboxNameAlreadySet() throws Exception {
// given // given
String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers"; String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
String sandboxName = "originalSandboxName"; String sandboxName = "originalSandboxName";
@ -44,7 +44,7 @@ public class GenerateOoziePropertiesMojoTest {
} }
@Test @Test
public void testExecuteEmptyWorkflowSourceDir() throws Exception { void testExecuteEmptyWorkflowSourceDir() throws Exception {
// given // given
String workflowSourceDir = ""; String workflowSourceDir = "";
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
@ -57,7 +57,7 @@ public class GenerateOoziePropertiesMojoTest {
} }
@Test @Test
public void testExecuteNullSandboxNameGenerated() throws Exception { void testExecuteNullSandboxNameGenerated() throws Exception {
// given // given
String workflowSourceDir = "eu/dnetlib/dhp/"; String workflowSourceDir = "eu/dnetlib/dhp/";
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
@ -70,7 +70,7 @@ public class GenerateOoziePropertiesMojoTest {
} }
@Test @Test
public void testExecute() throws Exception { void testExecute() throws Exception {
// given // given
String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers"; String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
@ -83,7 +83,7 @@ public class GenerateOoziePropertiesMojoTest {
} }
@Test @Test
public void testExecuteWithoutRoot() throws Exception { void testExecuteWithoutRoot() throws Exception {
// given // given
String workflowSourceDir = "wf/transformers"; String workflowSourceDir = "wf/transformers";
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);

View File

@ -20,7 +20,7 @@ import org.mockito.junit.jupiter.MockitoExtension;
/** @author mhorst, claudio.atzori */ /** @author mhorst, claudio.atzori */
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
public class WritePredefinedProjectPropertiesTest { class WritePredefinedProjectPropertiesTest {
@Mock @Mock
private MavenProject mavenProject; private MavenProject mavenProject;
@ -39,7 +39,7 @@ public class WritePredefinedProjectPropertiesTest {
// ----------------------------------- TESTS --------------------------------------------- // ----------------------------------- TESTS ---------------------------------------------
@Test @Test
public void testExecuteEmpty() throws Exception { void testExecuteEmpty() throws Exception {
// execute // execute
mojo.execute(); mojo.execute();
@ -50,7 +50,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithProjectProperties() throws Exception { void testExecuteWithProjectProperties() throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -70,7 +70,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test() @Test()
public void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) { void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -84,7 +84,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception { void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -108,7 +108,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception { void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -132,7 +132,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception { void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -164,7 +164,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder) void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder)
throws Exception { throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
@ -194,7 +194,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromBlankLocation() { void testExecuteIncludingPropertyKeysFromBlankLocation() {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -214,7 +214,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder) void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder)
throws Exception { throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
@ -247,7 +247,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder) void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder)
throws Exception { throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
@ -273,7 +273,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception { void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception {
// given // given
mojo.setQuiet(true); mojo.setQuiet(true);
mojo.setIncludePropertyKeysFromFiles(new String[] { mojo.setIncludePropertyKeysFromFiles(new String[] {
@ -290,7 +290,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromInvalidFile() { void testExecuteIncludingPropertyKeysFromInvalidFile() {
// given // given
mojo.setIncludePropertyKeysFromFiles(new String[] { mojo.setIncludePropertyKeysFromFiles(new String[] {
"invalid location" "invalid location"
@ -301,7 +301,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception { void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception {
// given // given
mojo.setIncludeEnvironmentVariables(true); mojo.setIncludeEnvironmentVariables(true);
@ -318,7 +318,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception { void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception {
// given // given
String key = "systemPropertyKey"; String key = "systemPropertyKey";
String value = "systemPropertyValue"; String value = "systemPropertyValue";
@ -337,7 +337,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder) void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder)
throws Exception { throws Exception {
// given // given
String key = "systemPropertyKey "; String key = "systemPropertyKey ";

View File

@ -1,14 +0,0 @@
package eu.dnetlib.dhp.application;
import java.io.*;
import java.util.Map;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import com.google.common.collect.Maps;
public class ApplicationUtils {
}

View File

@ -56,13 +56,13 @@ public class ArgumentApplicationParser implements Serializable {
final StringWriter stringWriter = new StringWriter(); final StringWriter stringWriter = new StringWriter();
IOUtils.copy(gis, stringWriter); IOUtils.copy(gis, stringWriter);
return stringWriter.toString(); return stringWriter.toString();
} catch (Throwable e) { } catch (IOException e) {
log.error("Wrong value to decompress:" + abstractCompressed); log.error("Wrong value to decompress: {}", abstractCompressed);
throw new RuntimeException(e); throw new IllegalArgumentException(e);
} }
} }
public static String compressArgument(final String value) throws Exception { public static String compressArgument(final String value) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream(); ByteArrayOutputStream out = new ByteArrayOutputStream();
GZIPOutputStream gzip = new GZIPOutputStream(out); GZIPOutputStream gzip = new GZIPOutputStream(out);
gzip.write(value.getBytes()); gzip.write(value.getBytes());

View File

@ -9,9 +9,6 @@ public class OptionsParameter {
private boolean paramRequired; private boolean paramRequired;
private boolean compressed; private boolean compressed;
public OptionsParameter() {
}
public String getParamName() { public String getParamName() {
return paramName; return paramName;
} }

View File

@ -34,7 +34,7 @@ public class ApiDescriptor {
return params; return params;
} }
public void setParams(final HashMap<String, String> params) { public void setParams(final Map<String, String> params) {
this.params = params; this.params = params;
} }

View File

@ -12,6 +12,9 @@ public class Constants {
public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/"; public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
private Constants() {
}
static { static {
accessRightsCoarMap.put("OPEN", "c_abf2"); accessRightsCoarMap.put("OPEN", "c_abf2");
accessRightsCoarMap.put("RESTRICTED", "c_16ec"); accessRightsCoarMap.put("RESTRICTED", "c_16ec");

View File

@ -84,7 +84,7 @@ public class GraphResultMapper implements Serializable {
.setDocumentationUrl( .setDocumentationUrl(
value value
.stream() .stream()
.map(v -> v.getValue()) .map(Field::getValue)
.collect(Collectors.toList()))); .collect(Collectors.toList())));
Optional Optional
@ -100,20 +100,20 @@ public class GraphResultMapper implements Serializable {
.setContactgroup( .setContactgroup(
Optional Optional
.ofNullable(ir.getContactgroup()) .ofNullable(ir.getContactgroup())
.map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList())) .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
.orElse(null)); .orElse(null));
out out
.setContactperson( .setContactperson(
Optional Optional
.ofNullable(ir.getContactperson()) .ofNullable(ir.getContactperson())
.map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList())) .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
.orElse(null)); .orElse(null));
out out
.setTool( .setTool(
Optional Optional
.ofNullable(ir.getTool()) .ofNullable(ir.getTool())
.map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList())) .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
.orElse(null)); .orElse(null));
out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
@ -123,7 +123,8 @@ public class GraphResultMapper implements Serializable {
Optional Optional
.ofNullable(input.getAuthor()) .ofNullable(input.getAuthor())
.ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList()))); .ifPresent(
ats -> out.setAuthor(ats.stream().map(GraphResultMapper::getAuthor).collect(Collectors.toList())));
// I do not map Access Right UNKNOWN or OTHER // I do not map Access Right UNKNOWN or OTHER
@ -210,7 +211,7 @@ public class GraphResultMapper implements Serializable {
if (oInst.isPresent()) { if (oInst.isPresent()) {
out out
.setInstance( .setInstance(
oInst.get().stream().map(i -> getInstance(i)).collect(Collectors.toList())); oInst.get().stream().map(GraphResultMapper::getInstance).collect(Collectors.toList()));
} }
@ -230,7 +231,7 @@ public class GraphResultMapper implements Serializable {
.stream() .stream()
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
.collect(Collectors.toList()); .collect(Collectors.toList());
if (iTitle.size() > 0) { if (!iTitle.isEmpty()) {
out.setMaintitle(iTitle.get(0).getValue()); out.setMaintitle(iTitle.get(0).getValue());
} }
@ -239,7 +240,7 @@ public class GraphResultMapper implements Serializable {
.stream() .stream()
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
.collect(Collectors.toList()); .collect(Collectors.toList());
if (iTitle.size() > 0) { if (!iTitle.isEmpty()) {
out.setSubtitle(iTitle.get(0).getValue()); out.setSubtitle(iTitle.get(0).getValue());
} }

View File

@ -14,38 +14,33 @@ public class MakeTarArchive implements Serializable {
private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException { private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException {
Path hdfsWritePath = new Path(outputPath); Path hdfsWritePath = new Path(outputPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) { if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, true); fileSystem.delete(hdfsWritePath, true);
} }
fsDataOutputStream = fileSystem.create(hdfsWritePath); return new TarArchiveOutputStream(fileSystem.create(hdfsWritePath).getWrappedStream());
return new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream());
} }
private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name) private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name)
throws IOException { throws IOException {
Path hdfsWritePath = new Path(outputPath); Path hdfsWritePath = new Path(outputPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) { if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, true); fileSystem.delete(hdfsWritePath, true);
} }
fsDataOutputStream = fileSystem.create(hdfsWritePath); try (TarArchiveOutputStream ar = new TarArchiveOutputStream(
fileSystem.create(hdfsWritePath).getWrappedStream())) {
TarArchiveOutputStream ar = new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream()); RemoteIterator<LocatedFileStatus> iterator = fileSystem
.listFiles(
new Path(inputPath), true);
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem while (iterator.hasNext()) {
.listFiles( writeCurrentFile(fileSystem, dir_name, iterator, ar, 0);
new Path(inputPath), true); }
while (fileStatusListIterator.hasNext()) {
writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, 0);
} }
ar.close();
} }
public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name, public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name,

View File

@ -10,8 +10,6 @@ import java.util.Optional;
import java.util.stream.StreamSupport; import java.util.stream.StreamSupport;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.bson.Document; import org.bson.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -21,6 +19,7 @@ import com.mongodb.BasicDBObject;
import com.mongodb.MongoClient; import com.mongodb.MongoClient;
import com.mongodb.MongoClientURI; import com.mongodb.MongoClientURI;
import com.mongodb.QueryBuilder; import com.mongodb.QueryBuilder;
import com.mongodb.client.FindIterable;
import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase; import com.mongodb.client.MongoDatabase;
@ -46,7 +45,7 @@ public class MdstoreClient implements Closeable {
final String currentId = Optional final String currentId = Optional
.ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query)) .ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query))
.map(r -> r.first()) .map(FindIterable::first)
.map(d -> d.getString("currentId")) .map(d -> d.getString("currentId"))
.orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId)); .orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId));
@ -84,7 +83,7 @@ public class MdstoreClient implements Closeable {
if (!Iterables.contains(client.listDatabaseNames(), dbName)) { if (!Iterables.contains(client.listDatabaseNames(), dbName)) {
final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress()); final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress());
log.warn(err); log.warn(err);
throw new RuntimeException(err); throw new IllegalArgumentException(err);
} }
return client.getDatabase(dbName); return client.getDatabase(dbName);
} }
@ -97,7 +96,7 @@ public class MdstoreClient implements Closeable {
String.format("Missing collection '%s' in database '%s'", collName, db.getName())); String.format("Missing collection '%s' in database '%s'", collName, db.getName()));
log.warn(err); log.warn(err);
if (abortIfMissing) { if (abortIfMissing) {
throw new RuntimeException(err); throw new IllegalArgumentException(err);
} else { } else {
return null; return null;
} }

View File

@ -24,7 +24,6 @@ import com.google.common.hash.Hashing;
*/ */
public class PacePerson { public class PacePerson {
private static final String UTF8 = "UTF-8";
private List<String> name = Lists.newArrayList(); private List<String> name = Lists.newArrayList();
private List<String> surname = Lists.newArrayList(); private List<String> surname = Lists.newArrayList();
private List<String> fullname = Lists.newArrayList(); private List<String> fullname = Lists.newArrayList();

View File

@ -5,6 +5,9 @@ import java.io.*;
import java.io.IOException; import java.io.IOException;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.apache.http.HttpHeaders;
import org.apache.http.entity.ContentType;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
@ -43,7 +46,7 @@ public class ZenodoAPIClient implements Serializable {
this.deposition_id = deposition_id; this.deposition_id = deposition_id;
} }
public ZenodoAPIClient(String urlString, String access_token) throws IOException { public ZenodoAPIClient(String urlString, String access_token) {
this.urlString = urlString; this.urlString = urlString;
this.access_token = access_token; this.access_token = access_token;
@ -63,8 +66,8 @@ public class ZenodoAPIClient implements Serializable {
Request request = new Request.Builder() Request request = new Request.Builder()
.url(urlString) .url(urlString)
.addHeader("Content-Type", "application/json") // add request headers .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
.addHeader("Authorization", "Bearer " + access_token) .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.post(body) .post(body)
.build(); .build();
@ -103,8 +106,8 @@ public class ZenodoAPIClient implements Serializable {
Request request = new Request.Builder() Request request = new Request.Builder()
.url(bucket + "/" + file_name) .url(bucket + "/" + file_name)
.addHeader("Content-Type", "application/zip") // add request headers .addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers
.addHeader("Authorization", "Bearer " + access_token) .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len)) .put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
.build(); .build();
@ -130,8 +133,8 @@ public class ZenodoAPIClient implements Serializable {
Request request = new Request.Builder() Request request = new Request.Builder()
.url(urlString + "/" + deposition_id) .url(urlString + "/" + deposition_id)
.addHeader("Content-Type", "application/json") // add request headers .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
.addHeader("Authorization", "Bearer " + access_token) .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.put(body) .put(body)
.build(); .build();
@ -197,7 +200,7 @@ public class ZenodoAPIClient implements Serializable {
Request request = new Request.Builder() Request request = new Request.Builder()
.url(urlString + "/" + deposition_id + "/actions/newversion") .url(urlString + "/" + deposition_id + "/actions/newversion")
.addHeader("Authorization", "Bearer " + access_token) .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.post(body) .post(body)
.build(); .build();
@ -270,8 +273,8 @@ public class ZenodoAPIClient implements Serializable {
Request request = new Request.Builder() Request request = new Request.Builder()
.url(urlString) .url(urlString)
.addHeader("Content-Type", "application/json") // add request headers .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
.addHeader("Authorization", "Bearer " + access_token) .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.get() .get()
.build(); .build();
@ -293,8 +296,8 @@ public class ZenodoAPIClient implements Serializable {
Request request = new Request.Builder() Request request = new Request.Builder()
.url(url) .url(url)
.addHeader("Content-Type", "application/json") // add request headers .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
.addHeader("Authorization", "Bearer " + access_token) .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.get() .get()
.build(); .build();

View File

@ -32,13 +32,13 @@ public class Creator {
public static Creator newInstance(String name, String affiliation, String orcid) { public static Creator newInstance(String name, String affiliation, String orcid) {
Creator c = new Creator(); Creator c = new Creator();
if (!(name == null)) { if (name != null) {
c.name = name; c.name = name;
} }
if (!(affiliation == null)) { if (affiliation != null) {
c.affiliation = affiliation; c.affiliation = affiliation;
} }
if (!(orcid == null)) { if (orcid != null) {
c.orcid = orcid; c.orcid = orcid;
} }

View File

@ -3,17 +3,12 @@ package eu.dnetlib.dhp.common.api.zenodo;
import java.io.Serializable; import java.io.Serializable;
import net.minidev.json.annotate.JsonIgnore;
public class File implements Serializable { public class File implements Serializable {
private String checksum; private String checksum;
private String filename; private String filename;
private long filesize; private long filesize;
private String id; private String id;
@JsonIgnore
// private Links links;
public String getChecksum() { public String getChecksum() {
return checksum; return checksum;
} }
@ -46,13 +41,4 @@ public class File implements Serializable {
this.id = id; this.id = id;
} }
// @JsonIgnore
// public Links getLinks() {
// return links;
// }
//
// @JsonIgnore
// public void setLinks(Links links) {
// this.links = links;
// }
} }

View File

@ -1,11 +1,11 @@
package eu.dnetlib.dhp.common.rest; package eu.dnetlib.dhp.common.rest;
import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpUriRequest; import org.apache.http.client.methods.HttpUriRequest;
@ -23,17 +23,20 @@ public class DNetRestClient {
private static final ObjectMapper mapper = new ObjectMapper(); private static final ObjectMapper mapper = new ObjectMapper();
private DNetRestClient() {
}
public static <T> T doGET(final String url, Class<T> clazz) throws Exception { public static <T> T doGET(final String url, Class<T> clazz) throws Exception {
final HttpGet httpGet = new HttpGet(url); final HttpGet httpGet = new HttpGet(url);
return doHTTPRequest(httpGet, clazz); return doHTTPRequest(httpGet, clazz);
} }
public static String doGET(final String url) throws Exception { public static String doGET(final String url) throws IOException {
final HttpGet httpGet = new HttpGet(url); final HttpGet httpGet = new HttpGet(url);
return doHTTPRequest(httpGet); return doHTTPRequest(httpGet);
} }
public static <V> String doPOST(final String url, V objParam) throws Exception { public static <V> String doPOST(final String url, V objParam) throws IOException {
final HttpPost httpPost = new HttpPost(url); final HttpPost httpPost = new HttpPost(url);
if (objParam != null) { if (objParam != null) {
@ -45,25 +48,25 @@ public class DNetRestClient {
return doHTTPRequest(httpPost); return doHTTPRequest(httpPost);
} }
public static <T, V> T doPOST(final String url, V objParam, Class<T> clazz) throws Exception { public static <T, V> T doPOST(final String url, V objParam, Class<T> clazz) throws IOException {
return mapper.readValue(doPOST(url, objParam), clazz); return mapper.readValue(doPOST(url, objParam), clazz);
} }
private static String doHTTPRequest(final HttpUriRequest r) throws Exception { private static String doHTTPRequest(final HttpUriRequest r) throws IOException {
CloseableHttpClient client = HttpClients.createDefault(); try (CloseableHttpClient client = HttpClients.createDefault()) {
log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString()); log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString());
log log
.info( .info(
"request headers: {}", "request headers: {}",
Arrays Arrays
.asList(r.getAllHeaders()) .asList(r.getAllHeaders())
.stream() .stream()
.map(h -> h.getName() + ":" + h.getValue()) .map(h -> h.getName() + ":" + h.getValue())
.collect(Collectors.joining(","))); .collect(Collectors.joining(",")));
CloseableHttpResponse response = client.execute(r); return IOUtils.toString(client.execute(r).getEntity().getContent());
return IOUtils.toString(response.getEntity().getContent()); }
} }
private static <T> T doHTTPRequest(final HttpUriRequest r, Class<T> clazz) throws Exception { private static <T> T doHTTPRequest(final HttpUriRequest r, Class<T> clazz) throws Exception {

View File

@ -46,7 +46,7 @@ public class Vocabulary implements Serializable {
} }
public VocabularyTerm getTerm(final String id) { public VocabularyTerm getTerm(final String id) {
return Optional.ofNullable(id).map(s -> s.toLowerCase()).map(s -> terms.get(s)).orElse(null); return Optional.ofNullable(id).map(String::toLowerCase).map(terms::get).orElse(null);
} }
protected void addTerm(final String id, final String name) { protected void addTerm(final String id, final String name) {
@ -81,7 +81,6 @@ public class Vocabulary implements Serializable {
.ofNullable(getTermBySynonym(syn)) .ofNullable(getTermBySynonym(syn))
.map(term -> getTermAsQualifier(term.getId())) .map(term -> getTermAsQualifier(term.getId()))
.orElse(null); .orElse(null);
// .orElse(OafMapperUtils.unknown(getId(), getName()));
} }
} }

View File

@ -46,7 +46,6 @@ public class VocabularyGroup implements Serializable {
} }
vocs.addTerm(vocId, termId, termName); vocs.addTerm(vocId, termId, termName);
// vocs.addSynonyms(vocId, termId, termId);
} }
} }
@ -58,7 +57,6 @@ public class VocabularyGroup implements Serializable {
final String syn = arr[2].trim(); final String syn = arr[2].trim();
vocs.addSynonyms(vocId, termId, syn); vocs.addSynonyms(vocId, termId, syn);
// vocs.addSynonyms(vocId, termId, termId);
} }
} }
@ -98,7 +96,7 @@ public class VocabularyGroup implements Serializable {
.getTerms() .getTerms()
.values() .values()
.stream() .stream()
.map(t -> t.getId()) .map(VocabularyTerm::getId)
.collect(Collectors.toCollection(HashSet::new)); .collect(Collectors.toCollection(HashSet::new));
} }
@ -154,16 +152,19 @@ public class VocabularyGroup implements Serializable {
return Optional return Optional
.ofNullable(vocId) .ofNullable(vocId)
.map(String::toLowerCase) .map(String::toLowerCase)
.map(id -> vocs.containsKey(id)) .map(vocs::containsKey)
.orElse(false); .orElse(false);
} }
private void addSynonyms(final String vocId, final String termId, final String syn) { private void addSynonyms(final String vocId, final String termId, final String syn) {
String id = Optional String id = Optional
.ofNullable(vocId) .ofNullable(vocId)
.map(s -> s.toLowerCase()) .map(String::toLowerCase)
.orElseThrow( .orElseThrow(
() -> new IllegalArgumentException(String.format("empty vocabulary id for [term:%s, synonym:%s]"))); () -> new IllegalArgumentException(
String
.format(
"empty vocabulary id for [term:%s, synonym:%s]", termId, syn)));
Optional Optional
.ofNullable(vocs.get(id)) .ofNullable(vocs.get(id))
.orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId)) .orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId))

View File

@ -2,7 +2,6 @@
package eu.dnetlib.dhp.message; package eu.dnetlib.dhp.message;
import java.io.Serializable; import java.io.Serializable;
import java.util.HashMap;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.Map; import java.util.Map;
@ -10,8 +9,8 @@ public class Message implements Serializable {
private static final long serialVersionUID = 401753881204524893L; private static final long serialVersionUID = 401753881204524893L;
public static String CURRENT_PARAM = "current"; public static final String CURRENT_PARAM = "current";
public static String TOTAL_PARAM = "total"; public static final String TOTAL_PARAM = "total";
private MessageType messageType; private MessageType messageType;

View File

@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.merge;
import java.text.Normalizer; import java.text.Normalizer;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
@ -19,6 +18,9 @@ public class AuthorMerger {
private static final Double THRESHOLD = 0.95; private static final Double THRESHOLD = 0.95;
private AuthorMerger() {
}
public static List<Author> merge(List<List<Author>> authors) { public static List<Author> merge(List<List<Author>> authors) {
authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2))); authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
@ -36,7 +38,8 @@ public class AuthorMerger {
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) { public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
int pa = countAuthorsPids(a); int pa = countAuthorsPids(a);
int pb = countAuthorsPids(b); int pb = countAuthorsPids(b);
List<Author> base, enrich; List<Author> base;
List<Author> enrich;
int sa = authorsSize(a); int sa = authorsSize(a);
int sb = authorsSize(b); int sb = authorsSize(b);
@ -62,7 +65,7 @@ public class AuthorMerger {
// <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list) // <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
final Map<String, Author> basePidAuthorMap = base final Map<String, Author> basePidAuthorMap = base
.stream() .stream()
.filter(a -> a.getPid() != null && a.getPid().size() > 0) .filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.flatMap( .flatMap(
a -> a a -> a
.getPid() .getPid()
@ -74,7 +77,7 @@ public class AuthorMerger {
// <pid, Author> (list of pid that are missing in the other list) // <pid, Author> (list of pid that are missing in the other list)
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
.stream() .stream()
.filter(a -> a.getPid() != null && a.getPid().size() > 0) .filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.flatMap( .flatMap(
a -> a a -> a
.getPid() .getPid()
@ -117,9 +120,9 @@ public class AuthorMerger {
} }
public static String pidToComparableString(StructuredProperty pid) { public static String pidToComparableString(StructuredProperty pid) {
return (pid.getQualifier() != null final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : "" : "";
: "") return (pid.getQualifier() != null ? classid : "")
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); + (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
} }

View File

@ -12,6 +12,9 @@ import com.ximpleware.VTDNav;
/** Created by sandro on 9/29/16. */ /** Created by sandro on 9/29/16. */
public class VtdUtilityParser { public class VtdUtilityParser {
private VtdUtilityParser() {
}
public static List<Node> getTextValuesWithAttributes( public static List<Node> getTextValuesWithAttributes(
final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes) final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes)
throws VtdException { throws VtdException {

View File

@ -284,7 +284,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
r r
.getAuthor() .getAuthor()
.stream() .stream()
.filter(a -> Objects.nonNull(a)) .filter(Objects::nonNull)
.filter(a -> StringUtils.isNotBlank(a.getFullname())) .filter(a -> StringUtils.isNotBlank(a.getFullname()))
.filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", ""))) .filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")))
.collect(Collectors.toList())); .collect(Collectors.toList()));

View File

@ -17,13 +17,16 @@ import eu.dnetlib.dhp.schema.oaf.*;
public class OafMapperUtils { public class OafMapperUtils {
private OafMapperUtils() {
}
public static Oaf merge(final Oaf left, final Oaf right) { public static Oaf merge(final Oaf left, final Oaf right) {
if (ModelSupport.isSubClass(left, OafEntity.class)) { if (ModelSupport.isSubClass(left, OafEntity.class)) {
return mergeEntities((OafEntity) left, (OafEntity) right); return mergeEntities((OafEntity) left, (OafEntity) right);
} else if (ModelSupport.isSubClass(left, Relation.class)) { } else if (ModelSupport.isSubClass(left, Relation.class)) {
((Relation) left).mergeFrom((Relation) right); ((Relation) left).mergeFrom((Relation) right);
} else { } else {
throw new RuntimeException("invalid Oaf type:" + left.getClass().getCanonicalName()); throw new IllegalArgumentException("invalid Oaf type:" + left.getClass().getCanonicalName());
} }
return left; return left;
} }
@ -38,7 +41,7 @@ public class OafMapperUtils {
} else if (ModelSupport.isSubClass(left, Project.class)) { } else if (ModelSupport.isSubClass(left, Project.class)) {
left.mergeFrom(right); left.mergeFrom(right);
} else { } else {
throw new RuntimeException("invalid OafEntity subtype:" + left.getClass().getCanonicalName()); throw new IllegalArgumentException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
} }
return left; return left;
} }
@ -62,7 +65,7 @@ public class OafMapperUtils {
public static List<KeyValue> listKeyValues(final String... s) { public static List<KeyValue> listKeyValues(final String... s) {
if (s.length % 2 > 0) { if (s.length % 2 > 0) {
throw new RuntimeException("Invalid number of parameters (k,v,k,v,....)"); throw new IllegalArgumentException("Invalid number of parameters (k,v,k,v,....)");
} }
final List<KeyValue> list = new ArrayList<>(); final List<KeyValue> list = new ArrayList<>();
@ -88,7 +91,7 @@ public class OafMapperUtils {
.stream(values) .stream(values)
.map(v -> field(v, info)) .map(v -> field(v, info))
.filter(Objects::nonNull) .filter(Objects::nonNull)
.filter(distinctByKey(f -> f.getValue())) .filter(distinctByKey(Field::getValue))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
@ -97,7 +100,7 @@ public class OafMapperUtils {
.stream() .stream()
.map(v -> field(v, info)) .map(v -> field(v, info))
.filter(Objects::nonNull) .filter(Objects::nonNull)
.filter(distinctByKey(f -> f.getValue())) .filter(distinctByKey(Field::getValue))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
@ -342,10 +345,10 @@ public class OafMapperUtils {
if (instanceList != null) { if (instanceList != null) {
final Optional<AccessRight> min = instanceList final Optional<AccessRight> min = instanceList
.stream() .stream()
.map(i -> i.getAccessright()) .map(Instance::getAccessright)
.min(new AccessRightComparator<>()); .min(new AccessRightComparator<>());
final Qualifier rights = min.isPresent() ? qualifier(min.get()) : new Qualifier(); final Qualifier rights = min.map(OafMapperUtils::qualifier).orElseGet(Qualifier::new);
if (StringUtils.isBlank(rights.getClassid())) { if (StringUtils.isBlank(rights.getClassid())) {
rights.setClassid(UNKNOWN); rights.setClassid(UNKNOWN);

View File

@ -34,6 +34,9 @@ public class DHPUtils {
private static final Logger log = LoggerFactory.getLogger(DHPUtils.class); private static final Logger log = LoggerFactory.getLogger(DHPUtils.class);
private DHPUtils() {
}
public static Seq<String> toSeq(List<String> list) { public static Seq<String> toSeq(List<String> list) {
return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq(); return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
} }
@ -44,7 +47,7 @@ public class DHPUtils {
md.update(s.getBytes(StandardCharsets.UTF_8)); md.update(s.getBytes(StandardCharsets.UTF_8));
return new String(Hex.encodeHex(md.digest())); return new String(Hex.encodeHex(md.digest()));
} catch (final Exception e) { } catch (final Exception e) {
System.err.println("Error creating id"); log.error("Error creating id from {}", s);
return null; return null;
} }
} }
@ -53,33 +56,6 @@ public class DHPUtils {
return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId)); return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId));
} }
public static String compressString(final String input) {
try (ByteArrayOutputStream out = new ByteArrayOutputStream();
Base64OutputStream b64os = new Base64OutputStream(out)) {
GZIPOutputStream gzip = new GZIPOutputStream(b64os);
gzip.write(input.getBytes(StandardCharsets.UTF_8));
gzip.close();
return out.toString();
} catch (Throwable e) {
return null;
}
}
public static String decompressString(final String input) {
byte[] byteArray = Base64.decodeBase64(input.getBytes());
int len;
try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray)));
ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) {
byte[] buffer = new byte[1024];
while ((len = gis.read(buffer)) != -1) {
bos.write(buffer, 0, len);
}
return bos.toString();
} catch (Exception e) {
return null;
}
}
public static String getJPathString(final String jsonPath, final String json) { public static String getJPathString(final String jsonPath, final String json) {
try { try {
Object o = JsonPath.read(json, jsonPath); Object o = JsonPath.read(json, jsonPath);

View File

@ -18,13 +18,16 @@ public class ISLookupClientFactory {
private static final int requestTimeout = 60000 * 10; private static final int requestTimeout = 60000 * 10;
private static final int connectTimeout = 60000 * 10; private static final int connectTimeout = 60000 * 10;
private ISLookupClientFactory() {
}
public static ISLookUpService getLookUpService(final String isLookupUrl) { public static ISLookUpService getLookUpService(final String isLookupUrl) {
return getServiceStub(ISLookUpService.class, isLookupUrl); return getServiceStub(ISLookUpService.class, isLookupUrl);
} }
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
private static <T> T getServiceStub(final Class<T> clazz, final String endpoint) { private static <T> T getServiceStub(final Class<T> clazz, final String endpoint) {
log.info(String.format("creating %s stub from %s", clazz.getName(), endpoint)); log.info("creating {} stub from {}", clazz.getName(), endpoint);
final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean(); final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean();
jaxWsProxyFactory.setServiceClass(clazz); jaxWsProxyFactory.setServiceClass(clazz);
jaxWsProxyFactory.setAddress(endpoint); jaxWsProxyFactory.setAddress(endpoint);
@ -38,12 +41,10 @@ public class ISLookupClientFactory {
log log
.info( .info(
String "setting connectTimeout to {}, requestTimeout to {} for service {}",
.format( connectTimeout,
"setting connectTimeout to %s, requestTimeout to %s for service %s", requestTimeout,
connectTimeout, clazz.getCanonicalName());
requestTimeout,
clazz.getCanonicalName()));
policy.setConnectionTimeout(connectTimeout); policy.setConnectionTimeout(connectTimeout);
policy.setReceiveTimeout(requestTimeout); policy.setReceiveTimeout(requestTimeout);

View File

@ -10,7 +10,7 @@ import net.sf.saxon.trans.XPathException;
public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition { public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition {
public static String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension"; public static final String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension";
public abstract String getName(); public abstract String getName();

View File

@ -26,7 +26,7 @@ public class ExtractYear extends AbstractExtensionFunction {
@Override @Override
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException { public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
if (arguments == null | arguments.length == 0) { if (arguments == null || arguments.length == 0) {
return new StringValue(""); return new StringValue("");
} }
final Item item = arguments[0].head(); final Item item = arguments[0].head();
@ -63,8 +63,7 @@ public class ExtractYear extends AbstractExtensionFunction {
for (String format : dateFormats) { for (String format : dateFormats) {
try { try {
c.setTime(new SimpleDateFormat(format).parse(s)); c.setTime(new SimpleDateFormat(format).parse(s));
String year = String.valueOf(c.get(Calendar.YEAR)); return String.valueOf(c.get(Calendar.YEAR));
return year;
} catch (ParseException e) { } catch (ParseException e) {
} }
} }

View File

@ -30,7 +30,7 @@ public class NormalizeDate extends AbstractExtensionFunction {
@Override @Override
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException { public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
if (arguments == null | arguments.length == 0) { if (arguments == null || arguments.length == 0) {
return new StringValue(BLANK); return new StringValue(BLANK);
} }
String s = arguments[0].head().getStringValue(); String s = arguments[0].head().getStringValue();

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.utils.saxon; package eu.dnetlib.dhp.utils.saxon;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import net.sf.saxon.expr.XPathContext; import net.sf.saxon.expr.XPathContext;
@ -26,7 +28,8 @@ public class PickFirst extends AbstractExtensionFunction {
final String s1 = getValue(arguments[0]); final String s1 = getValue(arguments[0]);
final String s2 = getValue(arguments[1]); final String s2 = getValue(arguments[1]);
return new StringValue(StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : ""); final String value = isNotBlank(s1) ? s1 : isNotBlank(s2) ? s2 : "";
return new StringValue(value);
} }
private String getValue(final Sequence arg) throws XPathException { private String getValue(final Sequence arg) throws XPathException {

View File

@ -12,6 +12,9 @@ import net.sf.saxon.TransformerFactoryImpl;
public class SaxonTransformerFactory { public class SaxonTransformerFactory {
private SaxonTransformerFactory() {
}
/** /**
* Creates the index record transformer from the given XSLT * Creates the index record transformer from the given XSLT
* *

View File

@ -7,10 +7,10 @@ import static org.junit.jupiter.api.Assertions.assertNotNull;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
public class ArgumentApplicationParserTest { class ArgumentApplicationParserTest {
@Test @Test
public void testParseParameter() throws Exception { void testParseParameter() throws Exception {
final String jsonConfiguration = IOUtils final String jsonConfiguration = IOUtils
.toString( .toString(
this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json")); this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json"));

View File

@ -21,13 +21,13 @@ public class HdfsSupportTest {
class Remove { class Remove {
@Test @Test
public void shouldThrowARuntimeExceptionOnError() { void shouldThrowARuntimeExceptionOnError() {
// when // when
assertThrows(RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration())); assertThrows(RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration()));
} }
@Test @Test
public void shouldRemoveADirFromHDFS(@TempDir Path tempDir) { void shouldRemoveADirFromHDFS(@TempDir Path tempDir) {
// when // when
HdfsSupport.remove(tempDir.toString(), new Configuration()); HdfsSupport.remove(tempDir.toString(), new Configuration());
@ -36,7 +36,7 @@ public class HdfsSupportTest {
} }
@Test @Test
public void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException { void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException {
// given // given
Path file = Files.createTempFile(tempDir, "p", "s"); Path file = Files.createTempFile(tempDir, "p", "s");
@ -52,13 +52,13 @@ public class HdfsSupportTest {
class ListFiles { class ListFiles {
@Test @Test
public void shouldThrowARuntimeExceptionOnError() { void shouldThrowARuntimeExceptionOnError() {
// when // when
assertThrows(RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration())); assertThrows(RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration()));
} }
@Test @Test
public void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException { void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException {
Path subDir1 = Files.createTempDirectory(tempDir, "list_me"); Path subDir1 = Files.createTempDirectory(tempDir, "list_me");
Path subDir2 = Files.createTempDirectory(tempDir, "list_me"); Path subDir2 = Files.createTempDirectory(tempDir, "list_me");

View File

@ -5,10 +5,10 @@ import static org.junit.jupiter.api.Assertions.*;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
public class PacePersonTest { class PacePersonTest {
@Test @Test
public void pacePersonTest1() { void pacePersonTest1() {
PacePerson p = new PacePerson("Artini, Michele", false); PacePerson p = new PacePerson("Artini, Michele", false);
assertEquals("Artini", p.getSurnameString()); assertEquals("Artini", p.getSurnameString());
@ -17,7 +17,7 @@ public class PacePersonTest {
} }
@Test @Test
public void pacePersonTest2() { void pacePersonTest2() {
PacePerson p = new PacePerson("Michele G. Artini", false); PacePerson p = new PacePerson("Michele G. Artini", false);
assertEquals("Artini, Michele G.", p.getNormalisedFullname()); assertEquals("Artini, Michele G.", p.getNormalisedFullname());
assertEquals("Michele G", p.getNameString()); assertEquals("Michele G", p.getNameString());

View File

@ -18,7 +18,8 @@ public class SparkSessionSupportTest {
class RunWithSparkSession { class RunWithSparkSession {
@Test @Test
public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() @SuppressWarnings("unchecked")
void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged()
throws Exception { throws Exception {
// given // given
SparkSession spark = mock(SparkSession.class); SparkSession spark = mock(SparkSession.class);
@ -37,7 +38,8 @@ public class SparkSessionSupportTest {
} }
@Test @Test
public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() @SuppressWarnings("unchecked")
void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged()
throws Exception { throws Exception {
// given // given
SparkSession spark = mock(SparkSession.class); SparkSession spark = mock(SparkSession.class);

View File

@ -12,7 +12,7 @@ import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@Disabled @Disabled
public class ZenodoAPIClientTest { class ZenodoAPIClientTest {
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions"; private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
private final String ACCESS_TOKEN = ""; private final String ACCESS_TOKEN = "";
@ -22,7 +22,7 @@ public class ZenodoAPIClientTest {
private final String depositionId = "674915"; private final String depositionId = "674915";
@Test @Test
public void testUploadOldDeposition() throws IOException, MissingConceptDoiException { void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN); ACCESS_TOKEN);
Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId)); Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
@ -44,7 +44,7 @@ public class ZenodoAPIClientTest {
} }
@Test @Test
public void testNewDeposition() throws IOException { void testNewDeposition() throws IOException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN); ACCESS_TOKEN);
@ -67,7 +67,7 @@ public class ZenodoAPIClientTest {
} }
@Test @Test
public void testNewVersionNewName() throws IOException, MissingConceptDoiException { void testNewVersionNewName() throws IOException, MissingConceptDoiException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN); ACCESS_TOKEN);
@ -87,7 +87,7 @@ public class ZenodoAPIClientTest {
} }
@Test @Test
public void testNewVersionOldName() throws IOException, MissingConceptDoiException { void testNewVersionOldName() throws IOException, MissingConceptDoiException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN); ACCESS_TOKEN);

View File

@ -21,7 +21,7 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.pace.util.MapDocumentUtil; import eu.dnetlib.pace.util.MapDocumentUtil;
import scala.Tuple2; import scala.Tuple2;
public class AuthorMergerTest { class AuthorMergerTest {
private String publicationsBasePath; private String publicationsBasePath;
@ -43,7 +43,7 @@ public class AuthorMergerTest {
} }
@Test @Test
public void mergeTest() { // used in the dedup: threshold set to 0.95 void mergeTest() { // used in the dedup: threshold set to 0.95
for (List<Author> authors1 : authors) { for (List<Author> authors1 : authors) {
System.out.println("List " + (authors.indexOf(authors1) + 1)); System.out.println("List " + (authors.indexOf(authors1) + 1));

View File

@ -21,7 +21,7 @@ import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import me.xuender.unidecode.Unidecode; import me.xuender.unidecode.Unidecode;
public class OafMapperUtilsTest { class OafMapperUtilsTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
@ -42,7 +42,7 @@ public class OafMapperUtilsTest {
} }
@Test @Test
public void testDateValidation() { void testDateValidation() {
assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent()); assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent());
assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent()); assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent());
@ -147,44 +147,46 @@ public class OafMapperUtilsTest {
} }
@Test @Test
public void testDate() { void testDate() {
System.out.println(GraphCleaningFunctions.cleanDate("23-FEB-1998")); final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998");
assertNotNull(date);
System.out.println(date);
} }
@Test @Test
public void testMergePubs() throws IOException { void testMergePubs() throws IOException {
Publication p1 = read("publication_1.json", Publication.class); Publication p1 = read("publication_1.json", Publication.class);
Publication p2 = read("publication_2.json", Publication.class); Publication p2 = read("publication_2.json", Publication.class);
Dataset d1 = read("dataset_1.json", Dataset.class); Dataset d1 = read("dataset_1.json", Dataset.class);
Dataset d2 = read("dataset_2.json", Dataset.class); Dataset d2 = read("dataset_2.json", Dataset.class);
assertEquals(p1.getCollectedfrom().size(), 1); assertEquals(1, p1.getCollectedfrom().size());
assertEquals(p1.getCollectedfrom().get(0).getKey(), ModelConstants.CROSSREF_ID); assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey());
assertEquals(d2.getCollectedfrom().size(), 1); assertEquals(1, d2.getCollectedfrom().size());
assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
assertTrue( assertEquals(
ModelConstants.PUBLICATION_RESULTTYPE_CLASSID,
OafMapperUtils OafMapperUtils
.mergeResults(p1, d2) .mergeResults(p1, d2)
.getResulttype() .getResulttype()
.getClassid() .getClassid());
.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID));
assertEquals(p2.getCollectedfrom().size(), 1); assertEquals(1, p2.getCollectedfrom().size());
assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
assertEquals(d1.getCollectedfrom().size(), 1); assertEquals(1, d1.getCollectedfrom().size());
assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
assertTrue( assertEquals(
ModelConstants.DATASET_RESULTTYPE_CLASSID,
OafMapperUtils OafMapperUtils
.mergeResults(p2, d1) .mergeResults(p2, d1)
.getResulttype() .getResulttype()
.getClassid() .getClassid());
.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID));
} }
protected HashSet<String> cfId(List<KeyValue> collectedfrom) { protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
return collectedfrom.stream().map(c -> c.getKey()).collect(Collectors.toCollection(HashSet::new)); return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
} }
protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException { protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {

View File

@ -3,10 +3,10 @@ package eu.dnetlib.scholexplorer.relation;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
public class RelationMapperTest { class RelationMapperTest {
@Test @Test
public void testLoadRels() throws Exception { void testLoadRels() throws Exception {
RelationMapper relationMapper = RelationMapper.load(); RelationMapper relationMapper = RelationMapper.load();
relationMapper.keySet().forEach(System.out::println); relationMapper.keySet().forEach(System.out::println);

View File

@ -3,40 +3,37 @@ package eu.dnetlib.dhp.actionmanager;
import java.io.Serializable; import java.io.Serializable;
import java.io.StringReader; import java.io.StringReader;
import java.util.*; import java.util.List;
import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Triple; import org.apache.commons.lang3.tuple.Triple;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.DocumentException; import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader; import org.dom4j.io.SAXReader;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import com.google.common.base.Joiner; import com.google.common.base.Joiner;
import com.google.common.base.Splitter; import com.google.common.base.Splitter;
import com.google.common.collect.Iterables; import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import eu.dnetlib.actionmanager.rmi.ActionManagerException; import eu.dnetlib.actionmanager.rmi.ActionManagerException;
import eu.dnetlib.actionmanager.set.ActionManagerSet;
import eu.dnetlib.actionmanager.set.ActionManagerSet.ImpactTypes;
import eu.dnetlib.dhp.actionmanager.partition.PartitionActionSetsByPayloadTypeJob;
import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2;
public class ISClient implements Serializable { public class ISClient implements Serializable {
private static final Logger log = LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class); private static final Logger log = LoggerFactory.getLogger(ISClient.class);
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ","; private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
private final ISLookUpService isLookup; private final transient ISLookUpService isLookup;
public ISClient(String isLookupUrl) { public ISClient(String isLookupUrl) {
isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl); isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl);
@ -63,7 +60,7 @@ public class ISClient implements Serializable {
.map( .map(
sets -> sets sets -> sets
.stream() .stream()
.map(set -> parseSetInfo(set)) .map(ISClient::parseSetInfo)
.filter(t -> ids.contains(t.getLeft())) .filter(t -> ids.contains(t.getLeft()))
.map(t -> buildDirectory(basePath, t)) .map(t -> buildDirectory(basePath, t))
.collect(Collectors.toList())) .collect(Collectors.toList()))
@ -73,15 +70,17 @@ public class ISClient implements Serializable {
} }
} }
private Triple<String, String, String> parseSetInfo(String set) { private static Triple<String, String, String> parseSetInfo(String set) {
try { try {
Document doc = new SAXReader().read(new StringReader(set)); final SAXReader reader = new SAXReader();
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
Document doc = reader.read(new StringReader(set));
return Triple return Triple
.of( .of(
doc.valueOf("//SET/@id"), doc.valueOf("//SET/@id"),
doc.valueOf("//SET/@directory"), doc.valueOf("//SET/@directory"),
doc.valueOf("//SET/@latest")); doc.valueOf("//SET/@latest"));
} catch (DocumentException e) { } catch (DocumentException | SAXException e) {
throw new IllegalStateException(e); throw new IllegalStateException(e);
} }
} }
@ -99,7 +98,7 @@ public class ISClient implements Serializable {
final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='" final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
+ propertyName + propertyName
+ "']/@value/string()"; + "']/@value/string()";
log.debug("quering for service property: " + q); log.debug("quering for service property: {}", q);
try { try {
final List<String> value = isLookup.quickSearchProfile(q); final List<String> value = isLookup.quickSearchProfile(q);
return Iterables.getOnlyElement(value); return Iterables.getOnlyElement(value);

View File

@ -62,6 +62,7 @@ public class MergeAndGet {
x.getClass().getCanonicalName(), y.getClass().getCanonicalName())); x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
} }
@SuppressWarnings("unchecked")
private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) { private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) {
if (x.getClass().equals(y.getClass()) if (x.getClass().equals(y.getClass())
&& x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) { && x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {

View File

@ -74,7 +74,9 @@ public class PromoteActionPayloadForGraphTableJob {
.orElse(true); .orElse(true);
logger.info("shouldGroupById: {}", shouldGroupById); logger.info("shouldGroupById: {}", shouldGroupById);
@SuppressWarnings("unchecked")
Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName); Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
@SuppressWarnings("unchecked")
Class<? extends Oaf> actionPayloadClazz = (Class<? extends Oaf>) Class.forName(actionPayloadClassName); Class<? extends Oaf> actionPayloadClazz = (Class<? extends Oaf>) Class.forName(actionPayloadClassName);
throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz); throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz);
@ -152,7 +154,7 @@ public class PromoteActionPayloadForGraphTableJob {
return spark return spark
.read() .read()
.parquet(path) .parquet(path)
.map((MapFunction<Row, String>) value -> extractPayload(value), Encoders.STRING()) .map((MapFunction<Row, String>) PromoteActionPayloadForGraphTableJob::extractPayload, Encoders.STRING())
.map( .map(
(MapFunction<String, A>) value -> decodePayload(actionPayloadClazz, value), (MapFunction<String, A>) value -> decodePayload(actionPayloadClazz, value),
Encoders.bean(actionPayloadClazz)); Encoders.bean(actionPayloadClazz));

View File

@ -80,7 +80,7 @@ public class PartitionActionSetsByPayloadTypeJobTest {
private ISClient isClient; private ISClient isClient;
@Test @Test
public void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception { void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception {
// given // given
Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets"); Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets");
Path outputDir = workingDir.resolve("output"); Path outputDir = workingDir.resolve("output");

View File

@ -20,7 +20,7 @@ public class MergeAndGetTest {
class MergeFromAndGetStrategy { class MergeFromAndGetStrategy {
@Test @Test
public void shouldThrowForOafAndOaf() { void shouldThrowForOafAndOaf() {
// given // given
Oaf a = mock(Oaf.class); Oaf a = mock(Oaf.class);
Oaf b = mock(Oaf.class); Oaf b = mock(Oaf.class);
@ -33,7 +33,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForOafAndRelation() { void shouldThrowForOafAndRelation() {
// given // given
Oaf a = mock(Oaf.class); Oaf a = mock(Oaf.class);
Relation b = mock(Relation.class); Relation b = mock(Relation.class);
@ -46,7 +46,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForOafAndOafEntity() { void shouldThrowForOafAndOafEntity() {
// given // given
Oaf a = mock(Oaf.class); Oaf a = mock(Oaf.class);
OafEntity b = mock(OafEntity.class); OafEntity b = mock(OafEntity.class);
@ -59,7 +59,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForRelationAndOaf() { void shouldThrowForRelationAndOaf() {
// given // given
Relation a = mock(Relation.class); Relation a = mock(Relation.class);
Oaf b = mock(Oaf.class); Oaf b = mock(Oaf.class);
@ -72,7 +72,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForRelationAndOafEntity() { void shouldThrowForRelationAndOafEntity() {
// given // given
Relation a = mock(Relation.class); Relation a = mock(Relation.class);
OafEntity b = mock(OafEntity.class); OafEntity b = mock(OafEntity.class);
@ -85,7 +85,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldBehaveProperlyForRelationAndRelation() { void shouldBehaveProperlyForRelationAndRelation() {
// given // given
Relation a = mock(Relation.class); Relation a = mock(Relation.class);
Relation b = mock(Relation.class); Relation b = mock(Relation.class);
@ -101,7 +101,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForOafEntityAndOaf() { void shouldThrowForOafEntityAndOaf() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
Oaf b = mock(Oaf.class); Oaf b = mock(Oaf.class);
@ -114,7 +114,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForOafEntityAndRelation() { void shouldThrowForOafEntityAndRelation() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
Relation b = mock(Relation.class); Relation b = mock(Relation.class);
@ -127,7 +127,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForOafEntityAndOafEntityButNotSubclasses() { void shouldThrowForOafEntityAndOafEntityButNotSubclasses() {
// given // given
class OafEntitySub1 extends OafEntity { class OafEntitySub1 extends OafEntity {
} }
@ -145,7 +145,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldBehaveProperlyForOafEntityAndOafEntity() { void shouldBehaveProperlyForOafEntityAndOafEntity() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
OafEntity b = mock(OafEntity.class); OafEntity b = mock(OafEntity.class);
@ -165,7 +165,7 @@ public class MergeAndGetTest {
class SelectNewerAndGetStrategy { class SelectNewerAndGetStrategy {
@Test @Test
public void shouldThrowForOafEntityAndRelation() { void shouldThrowForOafEntityAndRelation() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
Relation b = mock(Relation.class); Relation b = mock(Relation.class);
@ -178,7 +178,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForRelationAndOafEntity() { void shouldThrowForRelationAndOafEntity() {
// given // given
Relation a = mock(Relation.class); Relation a = mock(Relation.class);
OafEntity b = mock(OafEntity.class); OafEntity b = mock(OafEntity.class);
@ -191,7 +191,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForOafEntityAndResult() { void shouldThrowForOafEntityAndResult() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
Result b = mock(Result.class); Result b = mock(Result.class);
@ -204,7 +204,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() { void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() {
// given // given
// real types must be used because subclass-superclass resolution does not work for // real types must be used because subclass-superclass resolution does not work for
// mocks // mocks
@ -221,7 +221,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldShouldReturnLeftForOafEntityAndOafEntity() { void shouldShouldReturnLeftForOafEntityAndOafEntity() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
when(a.getLastupdatetimestamp()).thenReturn(1L); when(a.getLastupdatetimestamp()).thenReturn(1L);
@ -238,7 +238,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldShouldReturnRightForOafEntityAndOafEntity() { void shouldShouldReturnRightForOafEntityAndOafEntity() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
when(a.getLastupdatetimestamp()).thenReturn(2L); when(a.getLastupdatetimestamp()).thenReturn(2L);

View File

@ -77,7 +77,7 @@ public class PromoteActionPayloadForGraphTableJobTest {
class Main { class Main {
@Test @Test
public void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() { void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() {
// given // given
Class<Relation> rowClazz = Relation.class; Class<Relation> rowClazz = Relation.class;
Class<OafEntity> actionPayloadClazz = OafEntity.class; Class<OafEntity> actionPayloadClazz = OafEntity.class;
@ -116,7 +116,7 @@ public class PromoteActionPayloadForGraphTableJobTest {
@ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}") @ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}")
@MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams") @MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams")
public void shouldPromoteActionPayloadForGraphTable( void shouldPromoteActionPayloadForGraphTable(
MergeAndGet.Strategy strategy, MergeAndGet.Strategy strategy,
Class<? extends Oaf> rowClazz, Class<? extends Oaf> rowClazz,
Class<? extends Oaf> actionPayloadClazz) Class<? extends Oaf> actionPayloadClazz)

View File

@ -44,7 +44,7 @@ public class PromoteActionPayloadFunctionsTest {
class JoinTableWithActionPayloadAndMerge { class JoinTableWithActionPayloadAndMerge {
@Test @Test
public void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() { void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() {
// given // given
class OafImpl extends Oaf { class OafImpl extends Oaf {
} }
@ -58,7 +58,7 @@ public class PromoteActionPayloadFunctionsTest {
} }
@Test @Test
public void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() { void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() {
// given // given
String id0 = "id0"; String id0 = "id0";
String id1 = "id1"; String id1 = "id1";
@ -138,7 +138,7 @@ public class PromoteActionPayloadFunctionsTest {
} }
@Test @Test
public void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() { void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() {
// given // given
String id0 = "id0"; String id0 = "id0";
String id1 = "id1"; String id1 = "id1";
@ -218,7 +218,7 @@ public class PromoteActionPayloadFunctionsTest {
class GroupTableByIdAndMerge { class GroupTableByIdAndMerge {
@Test @Test
public void shouldRunProperly() { void shouldRunProperly() {
// given // given
String id1 = "id1"; String id1 = "id1";
String id2 = "id2"; String id2 = "id2";

View File

@ -4,6 +4,7 @@ package eu.dnetlib.dhp.actionmanager.bipfinder;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable; import java.io.Serializable;
import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
@ -28,15 +29,16 @@ import eu.dnetlib.dhp.schema.oaf.Result;
public class CollectAndSave implements Serializable { public class CollectAndSave implements Serializable {
private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class); private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static <I extends Result> void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils String jsonConfiguration = IOUtils
.toString( .toString(
CollectAndSave.class Objects
.getResourceAsStream( .requireNonNull(
"/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json")); CollectAndSave.class
.getResourceAsStream(
"/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json")));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);

View File

@ -87,7 +87,7 @@ public class SparkAtomicActionScoreJob implements Serializable {
private static <I extends Result> void prepareResults(SparkSession spark, String inputPath, String outputPath, private static <I extends Result> void prepareResults(SparkSession spark, String inputPath, String outputPath,
String bipScorePath, Class<I> inputClazz) { String bipScorePath, Class<I> inputClazz) {
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<BipDeserialize> bipDeserializeJavaRDD = sc JavaRDD<BipDeserialize> bipDeserializeJavaRDD = sc
.textFile(bipScorePath) .textFile(bipScorePath)
@ -101,8 +101,6 @@ public class SparkAtomicActionScoreJob implements Serializable {
return bs; return bs;
}).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)); }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class));
System.out.println(bipScores.count());
Dataset<I> results = readPath(spark, inputPath, inputClazz); Dataset<I> results = readPath(spark, inputPath, inputClazz);
results.createOrReplaceTempView("result"); results.createOrReplaceTempView("result");
@ -124,7 +122,7 @@ public class SparkAtomicActionScoreJob implements Serializable {
ret.setId(value._2().getId()); ret.setId(value._2().getId());
return ret; return ret;
}, Encoders.bean(BipScore.class)) }, Encoders.bean(BipScore.class))
.groupByKey((MapFunction<BipScore, String>) value -> value.getId(), Encoders.STRING()) .groupByKey((MapFunction<BipScore, String>) BipScore::getId, Encoders.STRING())
.mapGroups((MapGroupsFunction<String, BipScore, Result>) (k, it) -> { .mapGroups((MapGroupsFunction<String, BipScore, Result>) (k, it) -> {
Result ret = new Result(); Result ret = new Result();
ret.setDataInfo(getDataInfo()); ret.setDataInfo(getDataInfo());

View File

@ -171,26 +171,23 @@ public class PrepareProgramme {
} }
private static CSVProgramme groupProgrammeByCode(CSVProgramme a, CSVProgramme b) { private static CSVProgramme groupProgrammeByCode(CSVProgramme a, CSVProgramme b) {
if (!a.getLanguage().equals("en")) { if (!a.getLanguage().equals("en") && b.getLanguage().equalsIgnoreCase("en")) {
if (b.getLanguage().equalsIgnoreCase("en")) { a.setTitle(b.getTitle());
a.setTitle(b.getTitle()); a.setLanguage(b.getLanguage());
a.setLanguage(b.getLanguage());
}
} }
if (StringUtils.isEmpty(a.getShortTitle())) { if (StringUtils.isEmpty(a.getShortTitle()) && !StringUtils.isEmpty(b.getShortTitle())) {
if (!StringUtils.isEmpty(b.getShortTitle())) { a.setShortTitle(b.getShortTitle());
a.setShortTitle(b.getShortTitle());
}
} }
return a; return a;
} }
@SuppressWarnings("unchecked")
private static List<CSVProgramme> prepareClassification(JavaRDD<CSVProgramme> h2020Programmes) { private static List<CSVProgramme> prepareClassification(JavaRDD<CSVProgramme> h2020Programmes) {
Object[] codedescription = h2020Programmes Object[] codedescription = h2020Programmes
.map( .map(
value -> new Tuple2<>(value.getCode(), value -> new Tuple2<>(value.getCode(),
new Tuple2<String, String>(value.getTitle(), value.getShortTitle()))) new Tuple2<>(value.getTitle(), value.getShortTitle())))
.collect() .collect()
.toArray(); .toArray();
@ -216,7 +213,7 @@ public class PrepareProgramme {
String[] tmp = ent.split("\\."); String[] tmp = ent.split("\\.");
if (tmp.length <= 2) { if (tmp.length <= 2) {
if (StringUtils.isEmpty(entry._2()._2())) { if (StringUtils.isEmpty(entry._2()._2())) {
map.put(entry._1(), new Tuple2<String, String>(entry._2()._1(), entry._2()._1())); map.put(entry._1(), new Tuple2<>(entry._2()._1(), entry._2()._1()));
} else { } else {
map.put(entry._1(), entry._2()); map.put(entry._1(), entry._2());
} }

View File

@ -29,7 +29,7 @@ import scala.Tuple2;
*/ */
public class PrepareProjects { public class PrepareProjects {
private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class); private static final Logger log = LoggerFactory.getLogger(PrepareProjects.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {

View File

@ -31,15 +31,16 @@ import eu.dnetlib.dhp.common.DbClient;
*/ */
public class ReadProjectsFromDB implements Closeable { public class ReadProjectsFromDB implements Closeable {
private final DbClient dbClient;
private static final Log log = LogFactory.getLog(ReadProjectsFromDB.class); private static final Log log = LogFactory.getLog(ReadProjectsFromDB.class);
private static final String query = "SELECT code " +
"from projects where id like 'corda__h2020%' ";
private final DbClient dbClient;
private final Configuration conf; private final Configuration conf;
private final BufferedWriter writer; private final BufferedWriter writer;
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private final static String query = "SELECT code " +
"from projects where id like 'corda__h2020%' ";
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
@ -65,9 +66,9 @@ public class ReadProjectsFromDB implements Closeable {
} }
} }
public void execute(final String sql, final Function<ResultSet, List<ProjectSubset>> producer) throws Exception { public void execute(final String sql, final Function<ResultSet, List<ProjectSubset>> producer) {
final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(r -> writeProject(r)); final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(this::writeProject);
dbClient.processResults(sql, consumer); dbClient.processResults(sql, consumer);
} }
@ -94,20 +95,20 @@ public class ReadProjectsFromDB implements Closeable {
public ReadProjectsFromDB( public ReadProjectsFromDB(
final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword) final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword)
throws Exception { throws IOException {
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
this.conf = new Configuration(); this.conf = new Configuration();
this.conf.set("fs.defaultFS", hdfsNameNode); this.conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(this.conf); FileSystem fileSystem = FileSystem.get(this.conf);
Path hdfsWritePath = new Path(hdfsPath); Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) { if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, false); fileSystem.delete(hdfsWritePath, false);
} }
fsDataOutputStream = fileSystem.create(hdfsWritePath); FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
} }
@Override @Override

View File

@ -31,6 +31,7 @@ import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.H2020Classification; import eu.dnetlib.dhp.schema.oaf.H2020Classification;
import eu.dnetlib.dhp.schema.oaf.H2020Programme; import eu.dnetlib.dhp.schema.oaf.H2020Programme;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Tuple2; import scala.Tuple2;
@ -47,13 +48,10 @@ import scala.Tuple2;
* *
* To produce one single entry for each project code a step of groupoing is needed: each project can be associated to more * To produce one single entry for each project code a step of groupoing is needed: each project can be associated to more
* than one programme. * than one programme.
*
*
*/ */
public class SparkAtomicActionJob { public class SparkAtomicActionJob {
private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class); private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static final HashMap<String, String> programmeMap = new HashMap<>();
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
@ -137,7 +135,6 @@ public class SparkAtomicActionJob {
h2020classification.setClassification(csvProgramme.getClassification()); h2020classification.setClassification(csvProgramme.getClassification());
h2020classification.setH2020Programme(pm); h2020classification.setH2020Programme(pm);
setLevelsandProgramme(h2020classification, csvProgramme.getClassification_short()); setLevelsandProgramme(h2020classification, csvProgramme.getClassification_short());
// setProgramme(h2020classification, ocsvProgramme.get().getClassification());
pp.setH2020classification(Arrays.asList(h2020classification)); pp.setH2020classification(Arrays.asList(h2020classification));
return pp; return pp;
@ -152,20 +149,16 @@ public class SparkAtomicActionJob {
.map((MapFunction<Tuple2<Project, EXCELTopic>, Project>) p -> { .map((MapFunction<Tuple2<Project, EXCELTopic>, Project>) p -> {
Optional<EXCELTopic> op = Optional.ofNullable(p._2()); Optional<EXCELTopic> op = Optional.ofNullable(p._2());
Project rp = p._1(); Project rp = p._1();
if (op.isPresent()) { op.ifPresent(excelTopic -> rp.setH2020topicdescription(excelTopic.getTitle()));
rp.setH2020topicdescription(op.get().getTitle());
}
return rp; return rp;
}, Encoders.bean(Project.class)) }, Encoders.bean(Project.class))
.filter(Objects::nonNull) .filter(Objects::nonNull)
.groupByKey( .groupByKey(
(MapFunction<Project, String>) p -> p.getId(), (MapFunction<Project, String>) OafEntity::getId,
Encoders.STRING()) Encoders.STRING())
.mapGroups((MapGroupsFunction<String, Project, Project>) (s, it) -> { .mapGroups((MapGroupsFunction<String, Project, Project>) (s, it) -> {
Project first = it.next(); Project first = it.next();
it.forEachRemaining(p -> { it.forEachRemaining(first::mergeFrom);
first.mergeFrom(p);
});
return first; return first;
}, Encoders.bean(Project.class)) }, Encoders.bean(Project.class))
.toJavaRDD() .toJavaRDD()
@ -189,12 +182,6 @@ public class SparkAtomicActionJob {
h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]); h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]);
} }
// private static void setProgramme(H2020Classification h2020Classification, String classification) {
// String[] tmp = classification.split(" \\| ");
//
// h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]);
// }
public static <R> Dataset<R> readPath( public static <R> Dataset<R> readPath(
SparkSession spark, String inputPath, Class<R> clazz) { SparkSession spark, String inputPath, Class<R> clazz) {
return spark return spark

View File

@ -32,12 +32,14 @@ public class CSVParser {
final Set<String> headers = parser.getHeaderMap().keySet(); final Set<String> headers = parser.getHeaderMap().keySet();
Class<?> clazz = Class.forName(classForName); Class<?> clazz = Class.forName(classForName);
for (CSVRecord csvRecord : parser.getRecords()) { for (CSVRecord csvRecord : parser.getRecords()) {
final Object cc = clazz.newInstance();
@SuppressWarnings("unchecked")
final R cc = (R) clazz.newInstance();
for (String header : headers) { for (String header : headers) {
FieldUtils.writeField(cc, header, csvRecord.get(header), true); FieldUtils.writeField(cc, header, csvRecord.get(header), true);
} }
ret.add((R) cc); ret.add(cc);
} }
return ret; return ret;

View File

@ -26,52 +26,52 @@ public class EXCELParser {
throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException, throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException,
InvalidFormatException { InvalidFormatException {
OPCPackage pkg = OPCPackage.open(file); try (OPCPackage pkg = OPCPackage.open(file); XSSFWorkbook wb = new XSSFWorkbook(pkg)) {
XSSFWorkbook wb = new XSSFWorkbook(pkg);
XSSFSheet sheet = wb.getSheet(sheetName); XSSFSheet sheet = wb.getSheet(sheetName);
if (sheetName == null) {
throw new RuntimeException("Sheet name " + sheetName + " not present in current file");
}
List<R> ret = new ArrayList<>();
DataFormatter dataFormatter = new DataFormatter();
Iterator<Row> rowIterator = sheet.rowIterator();
List<String> headers = new ArrayList<>();
int count = 0;
while (rowIterator.hasNext()) {
Row row = rowIterator.next();
if (count == 0) {
Iterator<Cell> cellIterator = row.cellIterator();
while (cellIterator.hasNext()) {
Cell cell = cellIterator.next();
headers.add(dataFormatter.formatCellValue(cell));
}
} else {
Class<?> clazz = Class.forName(classForName);
final Object cc = clazz.newInstance();
for (int i = 0; i < headers.size(); i++) {
Cell cell = row.getCell(i);
FieldUtils.writeField(cc, headers.get(i), dataFormatter.formatCellValue(cell), true);
}
EXCELTopic et = (EXCELTopic) cc;
if (StringUtils.isNotBlank(et.getRcn())) {
ret.add((R) cc);
}
if (sheetName == null) {
throw new IllegalArgumentException("Sheet name " + sheetName + " not present in current file");
} }
count += 1; List<R> ret = new ArrayList<>();
}
return ret; DataFormatter dataFormatter = new DataFormatter();
Iterator<Row> rowIterator = sheet.rowIterator();
List<String> headers = new ArrayList<>();
int count = 0;
while (rowIterator.hasNext()) {
Row row = rowIterator.next();
if (count == 0) {
Iterator<Cell> cellIterator = row.cellIterator();
while (cellIterator.hasNext()) {
Cell cell = cellIterator.next();
headers.add(dataFormatter.formatCellValue(cell));
}
} else {
Class<?> clazz = Class.forName(classForName);
final Object cc = clazz.newInstance();
for (int i = 0; i < headers.size(); i++) {
Cell cell = row.getCell(i);
FieldUtils.writeField(cc, headers.get(i), dataFormatter.formatCellValue(cell), true);
}
EXCELTopic et = (EXCELTopic) cc;
if (StringUtils.isNotBlank(et.getRcn())) {
ret.add((R) cc);
}
}
count += 1;
}
return ret;
}
} }
} }

View File

@ -26,7 +26,7 @@ import eu.dnetlib.dhp.collection.HttpConnector2;
*/ */
public class ReadCSV implements Closeable { public class ReadCSV implements Closeable {
private static final Log log = LogFactory.getLog(ReadCSV.class); private static final Log log = LogFactory.getLog(ReadCSV.class);
private final Configuration conf;
private final BufferedWriter writer; private final BufferedWriter writer;
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private final String csvFile; private final String csvFile;
@ -54,18 +54,17 @@ public class ReadCSV implements Closeable {
log.info("Getting CSV file..."); log.info("Getting CSV file...");
readCSV.execute(classForName); readCSV.execute(classForName);
} }
} }
public void execute(final String classForName) throws Exception { public void execute(final String classForName)
throws IOException, ClassNotFoundException, IllegalAccessException, InstantiationException {
CSVParser csvParser = new CSVParser(); CSVParser csvParser = new CSVParser();
csvParser csvParser
.parse(csvFile, classForName, delimiter) .parse(csvFile, classForName, delimiter)
.stream() .stream()
.forEach(p -> write(p)); .forEach(this::write);
} }
@Override @Override
@ -79,18 +78,18 @@ public class ReadCSV implements Closeable {
final String fileURL, final String fileURL,
char delimiter) char delimiter)
throws Exception { throws Exception {
this.conf = new Configuration(); Configuration conf = new Configuration();
this.conf.set("fs.defaultFS", hdfsNameNode); conf.set("fs.defaultFS", hdfsNameNode);
HttpConnector2 httpConnector = new HttpConnector2(); HttpConnector2 httpConnector = new HttpConnector2();
FileSystem fileSystem = FileSystem.get(this.conf); FileSystem fileSystem = FileSystem.get(conf);
Path hdfsWritePath = new Path(hdfsPath); Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) { if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, false); fileSystem.delete(hdfsWritePath, false);
} }
fsDataOutputStream = fileSystem.create(hdfsWritePath); final FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
this.csvFile = httpConnector.getInputSource(fileURL); this.csvFile = httpConnector.getInputSource(fileURL);
this.delimiter = delimiter; this.delimiter = delimiter;
} }

View File

@ -11,18 +11,20 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.collection.CollectorException;
import eu.dnetlib.dhp.collection.HttpConnector2; import eu.dnetlib.dhp.collection.HttpConnector2;
/** /**
* Applies the parsing of an excel file and writes the Serialization of it in hdfs * Applies the parsing of an excel file and writes the Serialization of it in hdfs
*/ */
public class ReadExcel implements Closeable { public class ReadExcel implements Closeable {
private static final Log log = LogFactory.getLog(ReadCSV.class); private static final Log log = LogFactory.getLog(ReadExcel.class);
private final Configuration conf;
private final BufferedWriter writer; private final BufferedWriter writer;
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private final InputStream excelFile; private final InputStream excelFile;
@ -51,13 +53,15 @@ public class ReadExcel implements Closeable {
} }
} }
public void execute(final String classForName, final String sheetName) throws Exception { public void execute(final String classForName, final String sheetName)
throws IOException, ClassNotFoundException, InvalidFormatException, IllegalAccessException,
InstantiationException {
EXCELParser excelParser = new EXCELParser(); EXCELParser excelParser = new EXCELParser();
excelParser excelParser
.parse(excelFile, classForName, sheetName) .parse(excelFile, classForName, sheetName)
.stream() .stream()
.forEach(p -> write(p)); .forEach(this::write);
} }
@Override @Override
@ -68,20 +72,20 @@ public class ReadExcel implements Closeable {
public ReadExcel( public ReadExcel(
final String hdfsPath, final String hdfsPath,
final String hdfsNameNode, final String hdfsNameNode,
final String fileURL) final String fileURL) throws CollectorException, IOException {
throws Exception {
this.conf = new Configuration(); final Configuration conf = new Configuration();
this.conf.set("fs.defaultFS", hdfsNameNode); conf.set("fs.defaultFS", hdfsNameNode);
HttpConnector2 httpConnector = new HttpConnector2(); HttpConnector2 httpConnector = new HttpConnector2();
FileSystem fileSystem = FileSystem.get(this.conf); FileSystem fileSystem = FileSystem.get(conf);
Path hdfsWritePath = new Path(hdfsPath); Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) { if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, false); fileSystem.delete(hdfsWritePath, false);
} }
fsDataOutputStream = fileSystem.create(hdfsWritePath); FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
this.excelFile = httpConnector.getInputSourceAsStream(fileURL); this.excelFile = httpConnector.getInputSourceAsStream(fileURL);
} }

View File

@ -9,6 +9,7 @@ import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -74,7 +75,7 @@ public class GenerateRorActionSetJob {
final String jsonConfiguration = IOUtils final String jsonConfiguration = IOUtils
.toString( .toString(
SparkAtomicActionJob.class GenerateRorActionSetJob.class
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json")); .getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
@ -108,7 +109,7 @@ public class GenerateRorActionSetJob {
private static void processRorOrganizations(final SparkSession spark, private static void processRorOrganizations(final SparkSession spark,
final String inputPath, final String inputPath,
final String outputPath) throws Exception { final String outputPath) throws IOException {
readInputPath(spark, inputPath) readInputPath(spark, inputPath)
.map( .map(
@ -203,7 +204,7 @@ public class GenerateRorActionSetJob {
private static Dataset<RorOrganization> readInputPath( private static Dataset<RorOrganization> readInputPath(
final SparkSession spark, final SparkSession spark,
final String path) throws Exception { final String path) throws IOException {
try (final FileSystem fileSystem = FileSystem.get(new Configuration()); try (final FileSystem fileSystem = FileSystem.get(new Configuration());
final InputStream is = fileSystem.open(new Path(path))) { final InputStream is = fileSystem.open(new Path(path))) {

View File

@ -7,6 +7,8 @@ import com.fasterxml.jackson.annotation.JsonProperty;
public class Address implements Serializable { public class Address implements Serializable {
private static final long serialVersionUID = 2444635485253443195L;
@JsonProperty("lat") @JsonProperty("lat")
private Float lat; private Float lat;
@ -37,8 +39,6 @@ public class Address implements Serializable {
@JsonProperty("line") @JsonProperty("line")
private String line; private String line;
private final static long serialVersionUID = 2444635485253443195L;
public Float getLat() { public Float getLat() {
return lat; return lat;
} }

View File

@ -7,14 +7,14 @@ import com.fasterxml.jackson.annotation.JsonProperty;
public class Country implements Serializable { public class Country implements Serializable {
private static final long serialVersionUID = 4357848706229493627L;
@JsonProperty("country_code") @JsonProperty("country_code")
private String countryCode; private String countryCode;
@JsonProperty("country_name") @JsonProperty("country_name")
private String countryName; private String countryName;
private final static long serialVersionUID = 4357848706229493627L;
public String getCountryCode() { public String getCountryCode() {
return countryCode; return countryCode;
} }

View File

@ -13,7 +13,7 @@ public class ExternalIdType implements Serializable {
private String preferred; private String preferred;
private final static long serialVersionUID = 2616688352998387611L; private static final long serialVersionUID = 2616688352998387611L;
public ExternalIdType() { public ExternalIdType() {
} }

View File

@ -15,8 +15,7 @@ import com.fasterxml.jackson.databind.JsonNode;
public class ExternalIdTypeDeserializer extends JsonDeserializer<ExternalIdType> { public class ExternalIdTypeDeserializer extends JsonDeserializer<ExternalIdType> {
@Override @Override
public ExternalIdType deserialize(final JsonParser p, final DeserializationContext ctxt) public ExternalIdType deserialize(final JsonParser p, final DeserializationContext ctxt) throws IOException {
throws IOException, JsonProcessingException {
final ObjectCodec oc = p.getCodec(); final ObjectCodec oc = p.getCodec();
final JsonNode node = oc.readTree(p); final JsonNode node = oc.readTree(p);

View File

@ -19,7 +19,7 @@ public class GeonamesAdmin implements Serializable {
@JsonProperty("code") @JsonProperty("code")
private String code; private String code;
private final static long serialVersionUID = 7294958526269195673L; private static final long serialVersionUID = 7294958526269195673L;
public String getAsciiName() { public String getAsciiName() {
return asciiName; return asciiName;

View File

@ -31,7 +31,7 @@ public class GeonamesCity implements Serializable {
@JsonProperty("license") @JsonProperty("license")
private License license; private License license;
private final static long serialVersionUID = -8389480201526252955L; private static final long serialVersionUID = -8389480201526252955L;
public NameAndCode getNutsLevel2() { public NameAndCode getNutsLevel2() {
return nutsLevel2; return nutsLevel2;

View File

@ -13,7 +13,7 @@ public class Label implements Serializable {
@JsonProperty("label") @JsonProperty("label")
private String label; private String label;
private final static long serialVersionUID = -6576156103297850809L; private static final long serialVersionUID = -6576156103297850809L;
public String getIso639() { public String getIso639() {
return iso639; return iso639;

View File

@ -13,7 +13,7 @@ public class License implements Serializable {
@JsonProperty("license") @JsonProperty("license")
private String license; private String license;
private final static long serialVersionUID = -194308261058176439L; private static final long serialVersionUID = -194308261058176439L;
public String getAttribution() { public String getAttribution() {
return attribution; return attribution;

View File

@ -7,14 +7,14 @@ import com.fasterxml.jackson.annotation.JsonProperty;
public class NameAndCode implements Serializable { public class NameAndCode implements Serializable {
private static final long serialVersionUID = 5459836979206140843L;
@JsonProperty("name") @JsonProperty("name")
private String name; private String name;
@JsonProperty("code") @JsonProperty("code")
private String code; private String code;
private final static long serialVersionUID = 5459836979206140843L;
public String getName() { public String getName() {
return name; return name;
} }

View File

@ -7,6 +7,8 @@ import com.fasterxml.jackson.annotation.JsonProperty;
public class Relationship implements Serializable { public class Relationship implements Serializable {
private static final long serialVersionUID = 7847399503395576960L;
@JsonProperty("type") @JsonProperty("type")
private String type; private String type;
@ -16,8 +18,6 @@ public class Relationship implements Serializable {
@JsonProperty("label") @JsonProperty("label")
private String label; private String label;
private final static long serialVersionUID = 7847399503395576960L;
public String getType() { public String getType() {
return type; return type;
} }

View File

@ -11,6 +11,8 @@ import com.fasterxml.jackson.annotation.JsonProperty;
public class RorOrganization implements Serializable { public class RorOrganization implements Serializable {
private static final long serialVersionUID = -2658312087616043225L;
@JsonProperty("ip_addresses") @JsonProperty("ip_addresses")
private List<String> ipAddresses = new ArrayList<>(); private List<String> ipAddresses = new ArrayList<>();
@ -59,8 +61,6 @@ public class RorOrganization implements Serializable {
@JsonProperty("status") @JsonProperty("status")
private String status; private String status;
private final static long serialVersionUID = -2658312087616043225L;
public List<String> getIpAddresses() { public List<String> getIpAddresses() {
return ipAddresses; return ipAddresses;
} }

View File

@ -11,8 +11,6 @@ import java.util.Objects;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.google.gson.Gson;
import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.message.MessageSender;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
@ -20,12 +18,12 @@ public class AggregatorReport extends LinkedHashMap<String, String> implements C
private static final Logger log = LoggerFactory.getLogger(AggregatorReport.class); private static final Logger log = LoggerFactory.getLogger(AggregatorReport.class);
private MessageSender messageSender; private transient MessageSender messageSender;
public AggregatorReport() { public AggregatorReport() {
} }
public AggregatorReport(MessageSender messageSender) throws IOException { public AggregatorReport(MessageSender messageSender) {
this.messageSender = messageSender; this.messageSender = messageSender;
} }

View File

@ -22,7 +22,7 @@ public abstract class ReportingJob {
protected final AggregatorReport report; protected final AggregatorReport report;
public ReportingJob(AggregatorReport report) { protected ReportingJob(AggregatorReport report) {
this.report = report; this.report = report;
} }

View File

@ -25,7 +25,7 @@ public class MDStoreActionNode {
NEW_VERSION, ROLLBACK, COMMIT, READ_LOCK, READ_UNLOCK NEW_VERSION, ROLLBACK, COMMIT, READ_LOCK, READ_UNLOCK
} }
public static String NEW_VERSION_URI = "%s/mdstore/%s/newVersion"; public static final String NEW_VERSION_URI = "%s/mdstore/%s/newVersion";
public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s"; public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s";
public static final String ROLLBACK_VERSION_URL = "%s/version/%s/abort"; public static final String ROLLBACK_VERSION_URL = "%s/version/%s/abort";
@ -70,7 +70,7 @@ public class MDStoreActionNode {
if (StringUtils.isBlank(hdfsuri)) { if (StringUtils.isBlank(hdfsuri)) {
throw new IllegalArgumentException("missing or empty argument namenode"); throw new IllegalArgumentException("missing or empty argument namenode");
} }
final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); final String mdStoreVersion_params = argumentParser.get(MDSTOREVERSIONPARAM);
final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class); final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class);
if (StringUtils.isBlank(mdStoreVersion.getId())) { if (StringUtils.isBlank(mdStoreVersion.getId())) {
@ -94,7 +94,7 @@ public class MDStoreActionNode {
break; break;
} }
case ROLLBACK: { case ROLLBACK: {
final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); final String mdStoreVersion_params = argumentParser.get(MDSTOREVERSIONPARAM);
final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class); final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class);
if (StringUtils.isBlank(mdStoreVersion.getId())) { if (StringUtils.isBlank(mdStoreVersion.getId())) {

View File

@ -116,7 +116,7 @@ public class CollectorWorker extends ReportingJob {
final CollectorPlugin.NAME.OTHER_NAME plugin = Optional final CollectorPlugin.NAME.OTHER_NAME plugin = Optional
.ofNullable(api.getParams().get("other_plugin_type")) .ofNullable(api.getParams().get("other_plugin_type"))
.map(CollectorPlugin.NAME.OTHER_NAME::valueOf) .map(CollectorPlugin.NAME.OTHER_NAME::valueOf)
.get(); .orElseThrow(() -> new IllegalArgumentException("invalid other_plugin_type"));
switch (plugin) { switch (plugin) {
case mdstore_mongodb_dump: case mdstore_mongodb_dump:

View File

@ -207,6 +207,7 @@ public class GenerateNativeStoreSparkJob {
totalItems.add(1); totalItems.add(1);
try { try {
SAXReader reader = new SAXReader(); SAXReader reader = new SAXReader();
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
Document document = reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8))); Document document = reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)));
Node node = document.selectSingleNode(xpath); Node node = document.selectSingleNode(xpath);
final String originalIdentifier = node.getText(); final String originalIdentifier = node.getText();

View File

@ -32,7 +32,7 @@ public class HttpConnector2 {
private String responseType = null; private String responseType = null;
private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; private static final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
public HttpConnector2() { public HttpConnector2() {
this(new HttpClientParams()); this(new HttpClientParams());
@ -120,7 +120,7 @@ public class HttpConnector2 {
if (is3xx(urlConn.getResponseCode())) { if (is3xx(urlConn.getResponseCode())) {
// REDIRECTS // REDIRECTS
final String newUrl = obtainNewLocation(urlConn.getHeaderFields()); final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
log.info(String.format("The requested url has been moved to %s", newUrl)); log.info("The requested url has been moved to {}", newUrl);
report report
.put( .put(
REPORT_PREFIX + urlConn.getResponseCode(), REPORT_PREFIX + urlConn.getResponseCode(),
@ -140,14 +140,14 @@ public class HttpConnector2 {
if (retryAfter > 0) { if (retryAfter > 0) {
log log
.warn( .warn(
requestUrl + " - waiting and repeating request after suggested retry-after " "{} - waiting and repeating request after suggested retry-after {} sec.",
+ retryAfter + " sec."); requestUrl, retryAfter);
backoffAndSleep(retryAfter * 1000); backoffAndSleep(retryAfter * 1000);
} else { } else {
log log
.warn( .warn(
requestUrl + " - waiting and repeating request after default delay of " "{} - waiting and repeating request after default delay of {} sec.",
+ getClientParams().getRetryDelay() + " sec."); requestUrl, getClientParams().getRetryDelay());
backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000); backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000);
} }
report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl); report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl);
@ -181,12 +181,12 @@ public class HttpConnector2 {
} }
private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
log.debug("StatusCode: " + urlConn.getResponseMessage()); log.debug("StatusCode: {}", urlConn.getResponseMessage());
for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) { for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
if (e.getKey() != null) { if (e.getKey() != null) {
for (String v : e.getValue()) { for (String v : e.getValue()) {
log.debug(" key: " + e.getKey() + " - value: " + v); log.debug(" key: {} - value: {}", e.getKey(), v);
} }
} }
} }
@ -204,7 +204,7 @@ public class HttpConnector2 {
private int obtainRetryAfter(final Map<String, List<String>> headerMap) { private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
for (String key : headerMap.keySet()) { for (String key : headerMap.keySet()) {
if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (headerMap.get(key).size() > 0) if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty())
&& NumberUtils.isCreatable(headerMap.get(key).get(0))) { && NumberUtils.isCreatable(headerMap.get(key).get(0))) {
return Integer.parseInt(headerMap.get(key).get(0)) + 10; return Integer.parseInt(headerMap.get(key).get(0)) + 10;
} }

View File

@ -11,8 +11,6 @@ import org.bson.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.mongodb.MongoClient;
import com.mongodb.MongoClientURI;
import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoCollection;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.aggregation.common.AggregatorReport;

View File

@ -23,7 +23,7 @@ public class MongoDbDumpCollectorPlugin implements CollectorPlugin {
public static final String PATH_PARAM = "path"; public static final String PATH_PARAM = "path";
public static final String BODY_JSONPATH = "$.body"; public static final String BODY_JSONPATH = "$.body";
public FileSystem fileSystem; private final FileSystem fileSystem;
public MongoDbDumpCollectorPlugin(FileSystem fileSystem) { public MongoDbDumpCollectorPlugin(FileSystem fileSystem) {
this.fileSystem = fileSystem; this.fileSystem = fileSystem;

View File

@ -2,7 +2,6 @@
package eu.dnetlib.dhp.collection.plugin.oai; package eu.dnetlib.dhp.collection.plugin.oai;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter; import java.io.StringWriter;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.URLEncoder; import java.net.URLEncoder;
@ -16,7 +15,6 @@ import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper; import org.dom4j.DocumentHelper;
import org.dom4j.Node; import org.dom4j.Node;
import org.dom4j.io.OutputFormat; import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter; import org.dom4j.io.XMLWriter;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -30,7 +28,8 @@ public class OaiIterator implements Iterator<String> {
private static final Logger log = LoggerFactory.getLogger(OaiIterator.class); private static final Logger log = LoggerFactory.getLogger(OaiIterator.class);
private final static String REPORT_PREFIX = "oai:"; private static final String REPORT_PREFIX = "oai:";
public static final String UTF_8 = "UTF-8";
private final Queue<String> queue = new PriorityBlockingQueue<>(); private final Queue<String> queue = new PriorityBlockingQueue<>();
@ -68,7 +67,7 @@ public class OaiIterator implements Iterator<String> {
try { try {
this.token = firstPage(); this.token = firstPage();
} catch (final CollectorException e) { } catch (final CollectorException e) {
throw new RuntimeException(e); throw new IllegalStateException(e);
} }
} }
} }
@ -90,7 +89,7 @@ public class OaiIterator implements Iterator<String> {
try { try {
token = otherPages(token); token = otherPages(token);
} catch (final CollectorException e) { } catch (final CollectorException e) {
throw new RuntimeException(e); throw new IllegalStateException(e);
} }
} }
return res; return res;
@ -99,23 +98,24 @@ public class OaiIterator implements Iterator<String> {
@Override @Override
public void remove() { public void remove() {
throw new UnsupportedOperationException();
} }
private String firstPage() throws CollectorException { private String firstPage() throws CollectorException {
try { try {
String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, "UTF-8"); String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, UTF_8);
if (set != null && !set.isEmpty()) { if (set != null && !set.isEmpty()) {
url += "&set=" + URLEncoder.encode(set, "UTF-8"); url += "&set=" + URLEncoder.encode(set, UTF_8);
} }
if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX) if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX)
|| fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { || fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
url += "&from=" + URLEncoder.encode(fromDate, "UTF-8"); url += "&from=" + URLEncoder.encode(fromDate, UTF_8);
} }
if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX) if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX)
|| untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { || untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
url += "&until=" + URLEncoder.encode(untilDate, "UTF-8"); url += "&until=" + URLEncoder.encode(untilDate, UTF_8);
} }
log.info("Start harvesting using url: " + url); log.info("Start harvesting using url: {}", url);
return downloadPage(url); return downloadPage(url);
} catch (final UnsupportedEncodingException e) { } catch (final UnsupportedEncodingException e) {
@ -143,7 +143,7 @@ public class OaiIterator implements Iterator<String> {
return downloadPage( return downloadPage(
baseUrl baseUrl
+ "?verb=ListRecords&resumptionToken=" + "?verb=ListRecords&resumptionToken="
+ URLEncoder.encode(resumptionToken, "UTF-8")); + URLEncoder.encode(resumptionToken, UTF_8));
} catch (final UnsupportedEncodingException e) { } catch (final UnsupportedEncodingException e) {
report.put(e.getClass().getName(), e.getMessage()); report.put(e.getClass().getName(), e.getMessage());
throw new CollectorException(e); throw new CollectorException(e);

View File

@ -131,7 +131,8 @@ public class RestIterator implements Iterator<String> {
private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath) private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath)
throws TransformerConfigurationException, XPathExpressionException { throws TransformerConfigurationException, XPathExpressionException {
transformer = TransformerFactory.newInstance().newTransformer(); final TransformerFactory factory = TransformerFactory.newInstance();
transformer = factory.newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3");
xpath = XPathFactory.newInstance().newXPath(); xpath = XPathFactory.newInstance().newXPath();
@ -142,7 +143,7 @@ public class RestIterator implements Iterator<String> {
private void initQueue() { private void initQueue() {
query = baseUrl + "?" + queryParams + querySize + queryFormat; query = baseUrl + "?" + queryParams + querySize + queryFormat;
log.info("REST calls starting with " + query); log.info("REST calls starting with {}", query);
} }
private void disconnect() { private void disconnect() {
@ -174,7 +175,7 @@ public class RestIterator implements Iterator<String> {
try { try {
query = downloadPage(query); query = downloadPage(query);
} catch (CollectorException e) { } catch (CollectorException e) {
log.debug("CollectorPlugin.next()-Exception: " + e); log.debug("CollectorPlugin.next()-Exception: {}", e);
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
@ -198,7 +199,7 @@ public class RestIterator implements Iterator<String> {
// check if cursor=* is initial set otherwise add it to the queryParam URL // check if cursor=* is initial set otherwise add it to the queryParam URL
if (resumptionType.equalsIgnoreCase("deep-cursor")) { if (resumptionType.equalsIgnoreCase("deep-cursor")) {
log.debug("check resumptionType deep-cursor and check cursor=*?" + query); log.debug("check resumptionType deep-cursor and check cursor=*?{}", query);
if (!query.contains("&cursor=")) { if (!query.contains("&cursor=")) {
query += "&cursor=*"; query += "&cursor=*";
} }
@ -208,16 +209,16 @@ public class RestIterator implements Iterator<String> {
log.info("requestig URL [{}]", query); log.info("requestig URL [{}]", query);
URL qUrl = new URL(query); URL qUrl = new URL(query);
log.debug("authMethod :" + authMethod); log.debug("authMethod: {}", authMethod);
if ("bearer".equalsIgnoreCase(this.authMethod)) { if ("bearer".equalsIgnoreCase(this.authMethod)) {
log.trace("authMethod before inputStream: " + resultXml); log.trace("authMethod before inputStream: {}", resultXml);
HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + authToken); conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + authToken);
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType()); conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType());
conn.setRequestMethod("GET"); conn.setRequestMethod("GET");
theHttpInputStream = conn.getInputStream(); theHttpInputStream = conn.getInputStream();
} else if (BASIC.equalsIgnoreCase(this.authMethod)) { } else if (BASIC.equalsIgnoreCase(this.authMethod)) {
log.trace("authMethod before inputStream: " + resultXml); log.trace("authMethod before inputStream: {}", resultXml);
HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Basic " + authToken); conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Basic " + authToken);
conn.setRequestProperty(HttpHeaders.ACCEPT, ContentType.APPLICATION_XML.getMimeType()); conn.setRequestProperty(HttpHeaders.ACCEPT, ContentType.APPLICATION_XML.getMimeType());
@ -237,13 +238,13 @@ public class RestIterator implements Iterator<String> {
if (!(emptyXml).equalsIgnoreCase(resultXml)) { if (!(emptyXml).equalsIgnoreCase(resultXml)) {
resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE); resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE);
nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET); nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET);
log.debug("nodeList.length: " + nodeList.getLength()); log.debug("nodeList.length: {}", nodeList.getLength());
for (int i = 0; i < nodeList.getLength(); i++) { for (int i = 0; i < nodeList.getLength(); i++) {
StringWriter sw = new StringWriter(); StringWriter sw = new StringWriter();
transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw)); transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
String toEnqueue = sw.toString(); String toEnqueue = sw.toString();
if (toEnqueue == null || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)) { if (toEnqueue == null || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)) {
log.warn("The following record resulted in empty item for the feeding queue: " + resultXml); log.warn("The following record resulted in empty item for the feeding queue: {}", resultXml);
} else { } else {
recordQueue.add(sw.toString()); recordQueue.add(sw.toString());
} }
@ -274,9 +275,9 @@ public class RestIterator implements Iterator<String> {
String[] resumptionKeyValue = arrayUrlArgStr.split("="); String[] resumptionKeyValue = arrayUrlArgStr.split("=");
if (isInteger(resumptionKeyValue[1])) { if (isInteger(resumptionKeyValue[1])) {
urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]); urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]);
log.debug("discover OldResumptionSize from Url (int): " + urlOldResumptionSize); log.debug("discover OldResumptionSize from Url (int): {}", urlOldResumptionSize);
} else { } else {
log.debug("discover OldResumptionSize from Url (str): " + resumptionKeyValue[1]); log.debug("discover OldResumptionSize from Url (str): {}", resumptionKeyValue[1]);
} }
} }
} }
@ -295,7 +296,7 @@ public class RestIterator implements Iterator<String> {
discoverResultSize += nodeList.getLength(); discoverResultSize += nodeList.getLength();
} }
} }
log.info("discoverResultSize: {}", discoverResultSize); log.info("discoverResultSize: {}", discoverResultSize);
break; break;
case "pagination": case "pagination":

View File

@ -67,10 +67,10 @@ public class TransformSparkJobNode {
log.info("outputBasePath: {}", outputBasePath); log.info("outputBasePath: {}", outputBasePath);
final String isLookupUrl = parser.get("isLookupUrl"); final String isLookupUrl = parser.get("isLookupUrl");
log.info(String.format("isLookupUrl: %s", isLookupUrl)); log.info("isLookupUrl: {}", isLookupUrl);
final String dateOfTransformation = parser.get("dateOfTransformation"); final String dateOfTransformation = parser.get("dateOfTransformation");
log.info(String.format("dateOfTransformation: %s", dateOfTransformation)); log.info("dateOfTransformation: {}", dateOfTransformation);
final Integer rpt = Optional final Integer rpt = Optional
.ofNullable(parser.get("recordsPerTask")) .ofNullable(parser.get("recordsPerTask"))
@ -129,9 +129,9 @@ public class TransformSparkJobNode {
.map((Function<MetadataRecord, MetadataRecord>) x::call); .map((Function<MetadataRecord, MetadataRecord>) x::call);
saveDataset(spark.createDataset(mdstore.rdd(), encoder), outputBasePath + MDSTORE_DATA_PATH); saveDataset(spark.createDataset(mdstore.rdd(), encoder), outputBasePath + MDSTORE_DATA_PATH);
log.info("Transformed item " + ct.getProcessedItems().count()); log.info("Transformed item {}", ct.getProcessedItems().count());
log.info("Total item " + ct.getTotalItems().count()); log.info("Total item {}", ct.getTotalItems().count());
log.info("Transformation Error item " + ct.getErrorItems().count()); log.info("Transformation Error item {}", ct.getErrorItems().count());
final long mdStoreSize = spark.read().load(outputBasePath + MDSTORE_DATA_PATH).count(); final long mdStoreSize = spark.read().load(outputBasePath + MDSTORE_DATA_PATH).count();
writeHdfsFile( writeHdfsFile(

View File

@ -13,12 +13,18 @@ import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction; import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
public class TransformationFactory { public class TransformationFactory {
private static final Logger log = LoggerFactory.getLogger(TransformationFactory.class); private static final Logger log = LoggerFactory.getLogger(TransformationFactory.class);
public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') where $x//RESOURCE_IDENTIFIER/@value = \"%s\" return $x//CODE/*[local-name() =\"stylesheet\"]"; public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') "
+
"where $x//RESOURCE_IDENTIFIER/@value = \"%s\" return $x//CODE/*[local-name() =\"stylesheet\"]";
private TransformationFactory() {
}
public static MapFunction<MetadataRecord, MetadataRecord> getTransformationPlugin( public static MapFunction<MetadataRecord, MetadataRecord> getTransformationPlugin(
final Map<String, String> jobArgument, final AggregationCounter counters, final ISLookUpService isLookupService) final Map<String, String> jobArgument, final AggregationCounter counters, final ISLookUpService isLookupService)
@ -27,7 +33,7 @@ public class TransformationFactory {
try { try {
final String transformationPlugin = jobArgument.get("transformationPlugin"); final String transformationPlugin = jobArgument.get("transformationPlugin");
log.info("Transformation plugin required " + transformationPlugin); log.info("Transformation plugin required {}", transformationPlugin);
switch (transformationPlugin) { switch (transformationPlugin) {
case "XSLT_TRANSFORM": { case "XSLT_TRANSFORM": {
final String transformationRuleId = jobArgument.get("transformationRuleId"); final String transformationRuleId = jobArgument.get("transformationRuleId");
@ -38,7 +44,7 @@ public class TransformationFactory {
final String transformationRule = queryTransformationRuleFromIS( final String transformationRule = queryTransformationRuleFromIS(
transformationRuleId, isLookupService); transformationRuleId, isLookupService);
final long dateOfTransformation = new Long(jobArgument.get("dateOfTransformation")); final long dateOfTransformation = Long.parseLong(jobArgument.get("dateOfTransformation"));
return new XSLTTransformationFunction(counters, transformationRule, dateOfTransformation, return new XSLTTransformationFunction(counters, transformationRule, dateOfTransformation,
vocabularies); vocabularies);
@ -46,7 +52,6 @@ public class TransformationFactory {
default: default:
throw new DnetTransformationException( throw new DnetTransformationException(
"transformation plugin does not exists for " + transformationPlugin); "transformation plugin does not exists for " + transformationPlugin);
} }
} catch (Throwable e) { } catch (Throwable e) {
@ -55,9 +60,9 @@ public class TransformationFactory {
} }
private static String queryTransformationRuleFromIS(final String transformationRuleId, private static String queryTransformationRuleFromIS(final String transformationRuleId,
final ISLookUpService isLookUpService) throws Exception { final ISLookUpService isLookUpService) throws DnetTransformationException, ISLookUpException {
final String query = String.format(TRULE_XQUERY, transformationRuleId); final String query = String.format(TRULE_XQUERY, transformationRuleId);
System.out.println("asking query to IS: " + query); log.info("asking query to IS: {}", query);
List<String> result = isLookUpService.quickSearchProfile(query); List<String> result = isLookUpService.quickSearchProfile(query);
if (result == null || result.isEmpty()) if (result == null || result.isEmpty())

View File

@ -4,11 +4,6 @@ package eu.dnetlib.dhp.transformation.xslt;
import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI; import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI;
import java.io.Serializable; import java.io.Serializable;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
import net.sf.saxon.s9api.*; import net.sf.saxon.s9api.*;

View File

@ -28,22 +28,12 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
private static final Set<String> particles = null; private static final Set<String> particles = null;
public PersonCleaner() {
}
private String normalize(String s) { private String normalize(String s) {
s = Normalizer.normalize(s, Normalizer.Form.NFD); // was NFD s = Normalizer.normalize(s, Normalizer.Form.NFD); // was NFD
s = s.replaceAll("\\(.+\\)", ""); s = s.replaceAll("\\(.+\\)", "");
s = s.replaceAll("\\[.+\\]", ""); s = s.replaceAll("\\[.+\\]", "");
s = s.replaceAll("\\{.+\\}", ""); s = s.replaceAll("\\{.+\\}", "");
s = s.replaceAll("\\s+-\\s+", "-"); s = s.replaceAll("\\s+-\\s+", "-");
// s = s.replaceAll("[\\W&&[^,-]]", " ");
// System.out.println("class Person: s: " + s);
// s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}&&[^,-]]", " ");
s = s.replaceAll("[\\p{Punct}&&[^-,]]", " "); s = s.replaceAll("[\\p{Punct}&&[^-,]]", " ");
s = s.replace("\\d", " "); s = s.replace("\\d", " ");
s = s.replace("\\n", " "); s = s.replace("\\n", " ");
@ -51,8 +41,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
s = s.replaceAll("\\s+", " "); s = s.replaceAll("\\s+", " ");
if (s.contains(",")) { if (s.contains(",")) {
// System.out.println("class Person: s: " + s);
String[] arr = s.split(","); String[] arr = s.split(",");
if (arr.length == 1) { if (arr.length == 1) {
@ -60,9 +48,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
} else if (arr.length > 1) { } else if (arr.length > 1) {
surname = splitTerms(arr[0]); surname = splitTerms(arr[0]);
firstname = splitTermsFirstName(arr[1]); firstname = splitTermsFirstName(arr[1]);
// System.out.println("class Person: surname: " + surname);
// System.out.println("class Person: firstname: " + firstname);
fullname.addAll(surname); fullname.addAll(surname);
fullname.addAll(firstname); fullname.addAll(firstname);
} }
@ -82,7 +67,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
} }
if (lastInitialPosition < fullname.size() - 1) { // Case: Michele G. Artini if (lastInitialPosition < fullname.size() - 1) { // Case: Michele G. Artini
firstname = fullname.subList(0, lastInitialPosition + 1); firstname = fullname.subList(0, lastInitialPosition + 1);
System.out.println("name: " + firstname);
surname = fullname.subList(lastInitialPosition + 1, fullname.size()); surname = fullname.subList(lastInitialPosition + 1, fullname.size());
} else if (hasSurnameInUpperCase) { // Case: Michele ARTINI } else if (hasSurnameInUpperCase) { // Case: Michele ARTINI
for (String term : fullname) { for (String term : fullname) {
@ -119,16 +103,9 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
} }
private List<String> splitTerms(String s) { private List<String> splitTerms(String s) {
if (particles == null) {
// particles = NGramUtils.loadFromClasspath("/eu/dnetlib/pace/config/name_particles.txt");
}
List<String> list = Lists.newArrayList(); List<String> list = Lists.newArrayList();
for (String part : Splitter.on(" ").omitEmptyStrings().split(s)) { for (String part : Splitter.on(" ").omitEmptyStrings().split(s)) {
// if (!particles.contains(part.toLowerCase())) {
list.add(part); list.add(part);
// }
} }
return list; return list;
} }
@ -152,9 +129,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
public String getNormalisedFullname() { public String getNormalisedFullname() {
return isAccurate() ? Joiner.on(" ").join(getSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) return isAccurate() ? Joiner.on(" ").join(getSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations())
: Joiner.on(" ").join(fullname); : Joiner.on(" ").join(fullname);
// return isAccurate() ?
// Joiner.on(" ").join(getCapitalSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) :
// Joiner.on(" ").join(fullname);
} }
public List<String> getCapitalSurname() { public List<String> getCapitalSurname() {

View File

@ -1,7 +1,6 @@
package eu.dnetlib.dhp.transformation.xslt; package eu.dnetlib.dhp.transformation.xslt;
import java.io.ByteArrayInputStream;
import java.io.Serializable; import java.io.Serializable;
import java.io.StringWriter; import java.io.StringWriter;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
@ -18,11 +17,11 @@ import net.sf.saxon.s9api.*;
public class XSLTTransformationFunction implements MapFunction<MetadataRecord, MetadataRecord>, Serializable { public class XSLTTransformationFunction implements MapFunction<MetadataRecord, MetadataRecord>, Serializable {
public final static String QNAME_BASE_URI = "http://eu/dnetlib/transform"; public static final String QNAME_BASE_URI = "http://eu/dnetlib/transform";
private final static String DATASOURCE_ID_PARAM = "varDataSourceId"; private static final String DATASOURCE_ID_PARAM = "varDataSourceId";
private final static String DATASOURCE_NAME_PARAM = "varOfficialName"; private static final String DATASOURCE_NAME_PARAM = "varOfficialName";
private final AggregationCounter aggregationCounter; private final AggregationCounter aggregationCounter;
@ -38,8 +37,7 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
final AggregationCounter aggregationCounter, final AggregationCounter aggregationCounter,
final String transformationRule, final String transformationRule,
long dateOfTransformation, long dateOfTransformation,
final VocabularyGroup vocabularies) final VocabularyGroup vocabularies) {
throws Exception {
this.aggregationCounter = aggregationCounter; this.aggregationCounter = aggregationCounter;
this.transformationRule = transformationRule; this.transformationRule = transformationRule;
this.vocabularies = vocabularies; this.vocabularies = vocabularies;

View File

@ -1,8 +1,6 @@
package eu.dnetlib.dhp.transformation.xslt.utils; package eu.dnetlib.dhp.transformation.xslt.utils;
// import org.apache.commons.text.WordUtils;
// import org.apache.commons.text.WordUtils;
import com.google.common.base.Function; import com.google.common.base.Function;
public class Capitalize implements Function<String, String> { public class Capitalize implements Function<String, String> {

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.actionmanager.bipfinder; package eu.dnetlib.dhp.actionmanager.bipfinder;
import static org.junit.jupiter.api.Assertions.*;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
@ -67,7 +69,7 @@ public class SparkAtomicActionScoreJobTest {
} }
@Test @Test
public void matchOne() throws Exception { void matchOne() throws Exception {
String bipScoresPath = getClass() String bipScoresPath = getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json")
.getPath(); .getPath();
@ -98,7 +100,7 @@ public class SparkAtomicActionScoreJobTest {
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
.map(aa -> ((Publication) aa.getPayload())); .map(aa -> ((Publication) aa.getPayload()));
Assertions.assertTrue(tmp.count() == 1); assertEquals(1, tmp.count());
Dataset<Publication> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); Dataset<Publication> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class));
verificationDataset.createOrReplaceTempView("publication"); verificationDataset.createOrReplaceTempView("publication");
@ -129,7 +131,7 @@ public class SparkAtomicActionScoreJobTest {
} }
@Test @Test
public void matchOneWithTwo() throws Exception { void matchOneWithTwo() throws Exception {
String bipScoresPath = getClass() String bipScoresPath = getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json")
.getPath(); .getPath();
@ -160,7 +162,7 @@ public class SparkAtomicActionScoreJobTest {
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
.map(aa -> ((Publication) aa.getPayload())); .map(aa -> ((Publication) aa.getPayload()));
Assertions.assertTrue(tmp.count() == 1); assertEquals(1, tmp.count());
Dataset<Publication> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); Dataset<Publication> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class));
verificationDataset.createOrReplaceTempView("publication"); verificationDataset.createOrReplaceTempView("publication");
@ -190,23 +192,21 @@ public class SparkAtomicActionScoreJobTest {
List<Row> tmp_ds = execVerification.filter("id = 'influence'").select("value").collectAsList(); List<Row> tmp_ds = execVerification.filter("id = 'influence'").select("value").collectAsList();
String tmp_influence = tmp_ds.get(0).getString(0); String tmp_influence = tmp_ds.get(0).getString(0);
Assertions assertTrue(
.assertTrue( "1.47565045883e-08".equals(tmp_influence) ||
"1.47565045883e-08".equals(tmp_influence) || "1.98956540239e-08".equals(tmp_influence));
"1.98956540239e-08".equals(tmp_influence));
tmp_influence = tmp_ds.get(1).getString(0); tmp_influence = tmp_ds.get(1).getString(0);
Assertions assertTrue(
.assertTrue( "1.47565045883e-08".equals(tmp_influence) ||
"1.47565045883e-08".equals(tmp_influence) || "1.98956540239e-08".equals(tmp_influence));
"1.98956540239e-08".equals(tmp_influence));
Assertions.assertTrue(!tmp_ds.get(0).getString(0).equals(tmp_ds.get(1).getString(0))); assertNotEquals(tmp_ds.get(1).getString(0), tmp_ds.get(0).getString(0));
} }
@Test @Test
public void matchTwo() throws Exception { void matchTwo() throws Exception {
String bipScoresPath = getClass() String bipScoresPath = getClass()
.getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json")
.getPath(); .getPath();
@ -237,7 +237,7 @@ public class SparkAtomicActionScoreJobTest {
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
.map(aa -> ((Publication) aa.getPayload())); .map(aa -> ((Publication) aa.getPayload()));
Assertions.assertTrue(tmp.count() == 2); assertEquals(2, tmp.count());
Dataset<Publication> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); Dataset<Publication> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class));
verificationDataset.createOrReplaceTempView("publication"); verificationDataset.createOrReplaceTempView("publication");

View File

@ -9,10 +9,10 @@ import org.junit.jupiter.api.Test;
import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser; import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser;
public class CSVParserTest { class CSVParserTest {
@Test @Test
public void readProgrammeTest() throws Exception { void readProgrammeTest() throws Exception {
String programmecsv = IOUtils String programmecsv = IOUtils
.toString( .toString(

View File

@ -20,7 +20,7 @@ import eu.dnetlib.dhp.collection.HttpConnector2;
public class EXCELParserTest { public class EXCELParserTest {
private static Path workingDir; private static Path workingDir;
private HttpConnector2 httpConnector = new HttpConnector2(); private final HttpConnector2 httpConnector = new HttpConnector2();
private static final String URL = "https://cordis.europa.eu/data/reference/cordisref-h2020topics.xlsx"; private static final String URL = "https://cordis.europa.eu/data/reference/cordisref-h2020topics.xlsx";
@BeforeAll @BeforeAll
@ -30,7 +30,7 @@ public class EXCELParserTest {
} }
@Test @Test
public void test1() throws CollectorException, IOException, InvalidFormatException, ClassNotFoundException, void test1() throws CollectorException, IOException, InvalidFormatException, ClassNotFoundException,
IllegalAccessException, InstantiationException { IllegalAccessException, InstantiationException {
EXCELParser excelParser = new EXCELParser(); EXCELParser excelParser = new EXCELParser();

View File

@ -66,7 +66,7 @@ public class PrepareH2020ProgrammeTest {
} }
@Test @Test
public void numberDistinctProgrammeTest() throws Exception { void numberDistinctProgrammeTest() throws Exception {
PrepareProgramme PrepareProgramme
.main( .main(
new String[] { new String[] {

View File

@ -66,7 +66,7 @@ public class PrepareProjectTest {
} }
@Test @Test
public void numberDistinctProjectTest() throws Exception { void numberDistinctProjectTest() throws Exception {
PrepareProjects PrepareProjects
.main( .main(
new String[] { new String[] {

View File

@ -69,7 +69,7 @@ public class SparkUpdateProjectTest {
} }
@Test @Test
public void numberDistinctProgrammeTest() throws Exception { void numberDistinctProgrammeTest() throws Exception {
SparkAtomicActionJob SparkAtomicActionJob
.main( .main(
new String[] { new String[] {

View File

@ -4,6 +4,8 @@ package eu.dnetlib.dhp.actionmanager.ror;
import java.io.FileInputStream; import java.io.FileInputStream;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@ -30,7 +32,9 @@ class GenerateRorActionSetJobTest {
.readValue(IOUtils.toString(getClass().getResourceAsStream("ror_org.json")), RorOrganization.class); .readValue(IOUtils.toString(getClass().getResourceAsStream("ror_org.json")), RorOrganization.class);
final Organization org = GenerateRorActionSetJob.convertRorOrg(r); final Organization org = GenerateRorActionSetJob.convertRorOrg(r);
System.out.println(mapper.writeValueAsString(org)); final String s = mapper.writeValueAsString(org);
Assertions.assertTrue(StringUtils.isNotBlank(s));
System.out.println(s);
} }
@Test @Test
@ -39,7 +43,9 @@ class GenerateRorActionSetJobTest {
.readValue(IOUtils.toString(new FileInputStream(local_file_path)), RorOrganization[].class); .readValue(IOUtils.toString(new FileInputStream(local_file_path)), RorOrganization[].class);
for (final RorOrganization r : arr) { for (final RorOrganization r : arr) {
GenerateRorActionSetJob.convertRorOrg(r); Organization o = GenerateRorActionSetJob.convertRorOrg(r);
Assertions.assertNotNull(o);
Assertions.assertTrue(StringUtils.isNotBlank(o.getId()));
} }
} }

View File

@ -97,7 +97,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest {
@Test @Test
@Order(1) @Order(1)
public void testGenerateNativeStoreSparkJobRefresh() throws Exception { void testGenerateNativeStoreSparkJobRefresh() throws Exception {
MDStoreVersion mdStoreV1 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_1.json"); MDStoreVersion mdStoreV1 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_1.json");
FileUtils.forceMkdir(new File(mdStoreV1.getHdfsPath())); FileUtils.forceMkdir(new File(mdStoreV1.getHdfsPath()));
@ -125,7 +125,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest {
@Test @Test
@Order(2) @Order(2)
public void testGenerateNativeStoreSparkJobIncremental() throws Exception { void testGenerateNativeStoreSparkJobIncremental() throws Exception {
MDStoreVersion mdStoreV2 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_2.json"); MDStoreVersion mdStoreV2 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_2.json");
FileUtils.forceMkdir(new File(mdStoreV2.getHdfsPath())); FileUtils.forceMkdir(new File(mdStoreV2.getHdfsPath()));
@ -155,7 +155,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest {
@Test @Test
@Order(3) @Order(3)
public void testTransformSparkJob() throws Exception { void testTransformSparkJob() throws Exception {
setUpVocabulary(); setUpVocabulary();
@ -206,7 +206,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest {
} }
@Test @Test
public void testJSONSerialization() throws Exception { void testJSONSerialization() throws Exception {
final String s = IOUtils.toString(getClass().getResourceAsStream("mdStoreVersion_1.json")); final String s = IOUtils.toString(getClass().getResourceAsStream("mdStoreVersion_1.json"));
System.out.println("s = " + s); System.out.println("s = " + s);
final ObjectMapper mapper = new ObjectMapper(); final ObjectMapper mapper = new ObjectMapper();
@ -217,7 +217,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest {
} }
@Test @Test
public void testGenerationMetadataRecord() throws Exception { void testGenerationMetadataRecord() throws Exception {
final String xml = IOUtils.toString(this.getClass().getResourceAsStream("./record.xml")); final String xml = IOUtils.toString(this.getClass().getResourceAsStream("./record.xml"));
@ -236,7 +236,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest {
} }
@Test @Test
public void testEquals() throws IOException { void testEquals() throws IOException {
final String xml = IOUtils.toString(this.getClass().getResourceAsStream("./record.xml")); final String xml = IOUtils.toString(this.getClass().getResourceAsStream("./record.xml"));
final MetadataRecord record = GenerateNativeStoreSparkJob final MetadataRecord record = GenerateNativeStoreSparkJob

View File

@ -21,7 +21,7 @@ import eu.dnetlib.dhp.collection.HttpClientParams;
* @author js, Andreas Czerniak * @author js, Andreas Czerniak
* *
*/ */
public class RestCollectorPluginTest { class RestCollectorPluginTest {
private static final Logger log = LoggerFactory.getLogger(RestCollectorPluginTest.class); private static final Logger log = LoggerFactory.getLogger(RestCollectorPluginTest.class);
@ -65,7 +65,7 @@ public class RestCollectorPluginTest {
@Disabled @Disabled
@Test @Test
public void test() throws CollectorException { void test() throws CollectorException {
AtomicInteger i = new AtomicInteger(0); AtomicInteger i = new AtomicInteger(0);
final Stream<String> stream = rcp.collect(api, new AggregatorReport()); final Stream<String> stream = rcp.collect(api, new AggregatorReport());

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.collector.worker; package eu.dnetlib.dhp.collector.worker;
import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.*;
import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@ -11,10 +11,10 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.collection.ApiDescriptor; import eu.dnetlib.dhp.collection.ApiDescriptor;
@Disabled @Disabled
public class CollectorWorkerApplicationTests { class CollectorWorkerApplicationTests {
@Test @Test
public void testCollectionOAI() throws Exception { void testCollectionOAI() throws Exception {
final ApiDescriptor api = new ApiDescriptor(); final ApiDescriptor api = new ApiDescriptor();
api.setId("oai"); api.setId("oai");
api.setProtocol("oai"); api.setProtocol("oai");

View File

@ -33,7 +33,7 @@ import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
public class TransformationJobTest extends AbstractVocabularyTest { class TransformationJobTest extends AbstractVocabularyTest {
private SparkConf sparkConf; private SparkConf sparkConf;
@ -49,7 +49,7 @@ public class TransformationJobTest extends AbstractVocabularyTest {
@Test @Test
@DisplayName("Test Date cleaner") @DisplayName("Test Date cleaner")
public void testDateCleaner() throws Exception { void testDateCleaner() throws Exception {
DateCleaner dc = new DateCleaner(); DateCleaner dc = new DateCleaner();
assertEquals("1982-09-20", dc.clean("20/09/1982")); assertEquals("1982-09-20", dc.clean("20/09/1982"));
assertEquals("2002-09-20", dc.clean("20-09-2002")); assertEquals("2002-09-20", dc.clean("20-09-2002"));
@ -60,7 +60,7 @@ public class TransformationJobTest extends AbstractVocabularyTest {
@Test @Test
@DisplayName("Test Transform Single XML using zenodo_tr XSLTTransformator") @DisplayName("Test Transform Single XML using zenodo_tr XSLTTransformator")
public void testTransformSaxonHE() throws Exception { void testTransformSaxonHE() throws Exception {
// We Set the input Record getting the XML from the classpath // We Set the input Record getting the XML from the classpath
final MetadataRecord mr = new MetadataRecord(); final MetadataRecord mr = new MetadataRecord();
@ -79,7 +79,7 @@ public class TransformationJobTest extends AbstractVocabularyTest {
@Test @Test
@DisplayName("Test Transform Inst.&Them.v4 record XML with zenodo_tr") @DisplayName("Test Transform Inst.&Them.v4 record XML with zenodo_tr")
public void testTransformITGv4Zenodo() throws Exception { void testTransformITGv4Zenodo() throws Exception {
// We Set the input Record getting the XML from the classpath // We Set the input Record getting the XML from the classpath
final MetadataRecord mr = new MetadataRecord(); final MetadataRecord mr = new MetadataRecord();
@ -97,7 +97,7 @@ public class TransformationJobTest extends AbstractVocabularyTest {
@Test @Test
@DisplayName("Test Transform record XML with xslt_cleaning_datarepo_datacite/oaiOpenAIRE") @DisplayName("Test Transform record XML with xslt_cleaning_datarepo_datacite/oaiOpenAIRE")
public void testTransformMostlyUsedScript() throws Exception { void testTransformMostlyUsedScript() throws Exception {
String xslTransformationScript = ""; String xslTransformationScript = "";
xslTransformationScript = "/eu/dnetlib/dhp/transform/scripts/xslt_cleaning_datarepo_datacite.xsl"; xslTransformationScript = "/eu/dnetlib/dhp/transform/scripts/xslt_cleaning_datarepo_datacite.xsl";
@ -119,7 +119,7 @@ public class TransformationJobTest extends AbstractVocabularyTest {
@Test @Test
@DisplayName("Test Transform record XML with xslt_cleaning_REST_OmicsDI") @DisplayName("Test Transform record XML with xslt_cleaning_REST_OmicsDI")
public void testTransformRestScript() throws Exception { void testTransformRestScript() throws Exception {
String xslTransformationScript = ""; String xslTransformationScript = "";
xslTransformationScript = "/eu/dnetlib/dhp/transform/scripts/xslt_cleaning_REST_OmicsDI.xsl"; xslTransformationScript = "/eu/dnetlib/dhp/transform/scripts/xslt_cleaning_REST_OmicsDI.xsl";
@ -140,7 +140,7 @@ public class TransformationJobTest extends AbstractVocabularyTest {
@Test @Test
@DisplayName("Test TransformSparkJobNode.main with oaiOpenaire_datacite (v4)") @DisplayName("Test TransformSparkJobNode.main with oaiOpenaire_datacite (v4)")
public void transformTestITGv4OAIdatacite(@TempDir Path testDir) throws Exception { void transformTestITGv4OAIdatacite(@TempDir Path testDir) throws Exception {
try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) { try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) {
@ -203,7 +203,7 @@ public class TransformationJobTest extends AbstractVocabularyTest {
@Test @Test
@DisplayName("Test TransformSparkJobNode.main") @DisplayName("Test TransformSparkJobNode.main")
public void transformTest(@TempDir Path testDir) throws Exception { void transformTest(@TempDir Path testDir) throws Exception {
try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) { try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) {

View File

@ -7,6 +7,7 @@ import java.io.IOException;
import java.io.OutputStreamWriter; import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.sql.ResultSet; import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.function.Consumer; import java.util.function.Consumer;
@ -32,11 +33,11 @@ public class ReadBlacklistFromDB implements Closeable {
private final DbClient dbClient; private final DbClient dbClient;
private static final Log log = LogFactory.getLog(ReadBlacklistFromDB.class); private static final Log log = LogFactory.getLog(ReadBlacklistFromDB.class);
private final Configuration conf;
private final BufferedWriter writer; private final BufferedWriter writer;
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private final static String query = "SELECT source_type, unnest(original_source_objects) as source, " + private static final String QUERY = "SELECT source_type, unnest(original_source_objects) as source, " +
"target_type, unnest(original_target_objects) as target, " + "target_type, unnest(original_target_objects) as target, " +
"relationship FROM blacklist WHERE status = 'ACCEPTED'"; "relationship FROM blacklist WHERE status = 'ACCEPTED'";
@ -60,12 +61,12 @@ public class ReadBlacklistFromDB implements Closeable {
dbPassword)) { dbPassword)) {
log.info("Processing blacklist..."); log.info("Processing blacklist...");
rbl.execute(query, rbl::processBlacklistEntry); rbl.execute(QUERY, rbl::processBlacklistEntry);
} }
} }
public void execute(final String sql, final Function<ResultSet, List<Relation>> producer) throws Exception { public void execute(final String sql, final Function<ResultSet, List<Relation>> producer) {
final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(r -> writeRelation(r)); final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(r -> writeRelation(r));
@ -99,7 +100,7 @@ public class ReadBlacklistFromDB implements Closeable {
return Arrays.asList(direct, inverse); return Arrays.asList(direct, inverse);
} catch (final Exception e) { } catch (final SQLException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
@ -112,12 +113,14 @@ public class ReadBlacklistFromDB implements Closeable {
public ReadBlacklistFromDB( public ReadBlacklistFromDB(
final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword) final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword)
throws Exception { throws IOException {
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
this.conf = new Configuration();
this.conf.set("fs.defaultFS", hdfsNameNode); Configuration conf = new Configuration();
FileSystem fileSystem = FileSystem.get(this.conf); conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf);
Path hdfsWritePath = new Path(hdfsPath); Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = null; FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) { if (fileSystem.exists(hdfsWritePath)) {
@ -133,7 +136,7 @@ public class ReadBlacklistFromDB implements Closeable {
try { try {
writer.write(OBJECT_MAPPER.writeValueAsString(r)); writer.write(OBJECT_MAPPER.writeValueAsString(r));
writer.newLine(); writer.newLine();
} catch (final Exception e) { } catch (final IOException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }

View File

@ -114,10 +114,8 @@ public class SparkRemoveBlacklistedRelationJob {
.map((MapFunction<Tuple2<Relation, Relation>, Relation>) c -> { .map((MapFunction<Tuple2<Relation, Relation>, Relation>) c -> {
Relation ir = c._1(); Relation ir = c._1();
Optional<Relation> obl = Optional.ofNullable(c._2()); Optional<Relation> obl = Optional.ofNullable(c._2());
if (obl.isPresent()) { if (obl.isPresent() && ir.equals(obl.get())) {
if (ir.equals(obl.get())) { return null;
return null;
}
} }
return ir; return ir;
}, Encoders.bean(Relation.class)) }, Encoders.bean(Relation.class))

View File

@ -62,7 +62,7 @@ public class BlackListTest {
} }
@Test @Test
public void noRemoveTest() throws Exception { void noRemoveTest() throws Exception {
SparkRemoveBlacklistedRelationJob SparkRemoveBlacklistedRelationJob
.main( .main(
new String[] { new String[] {
@ -89,7 +89,7 @@ public class BlackListTest {
} }
@Test @Test
public void removeNoMergeMatchTest() throws Exception { void removeNoMergeMatchTest() throws Exception {
SparkRemoveBlacklistedRelationJob SparkRemoveBlacklistedRelationJob
.main( .main(
new String[] { new String[] {
@ -128,7 +128,7 @@ public class BlackListTest {
} }
@Test @Test
public void removeMergeMatchTest() throws Exception { void removeMergeMatchTest() throws Exception {
SparkRemoveBlacklistedRelationJob SparkRemoveBlacklistedRelationJob
.main( .main(
new String[] { new String[] {

View File

@ -9,19 +9,24 @@ import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.DateUtils; import org.apache.commons.lang3.time.DateUtils;
import eu.dnetlib.broker.objects.OaBrokerAuthor;
import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerMainEntity;
import eu.dnetlib.broker.objects.OaBrokerRelatedDatasource; import eu.dnetlib.broker.objects.OaBrokerRelatedDatasource;
import eu.dnetlib.broker.objects.OaBrokerTypedValue;
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
public class EventFactory { public class EventFactory {
private final static String PRODUCER_ID = "OpenAIRE"; private static final String PRODUCER_ID = "OpenAIRE";
private final static String[] DATE_PATTERNS = { private static final String[] DATE_PATTERNS = {
"yyyy-MM-dd" "yyyy-MM-dd"
}; };
private EventFactory() {
}
public static Event newBrokerEvent(final UpdateInfo<?> updateInfo) { public static Event newBrokerEvent(final UpdateInfo<?> updateInfo) {
final Event res = new Event(); final Event res = new Event();
@ -61,7 +66,7 @@ public class EventFactory {
map.setTargetResultId(target.getOpenaireId()); map.setTargetResultId(target.getOpenaireId());
final List<String> titles = target.getTitles(); final List<String> titles = target.getTitles();
if (titles.size() > 0) { if (!titles.isEmpty()) {
map.setTargetResultTitle(titles.get(0)); map.setTargetResultTitle(titles.get(0));
} }
@ -70,8 +75,12 @@ public class EventFactory {
map.setTargetDateofacceptance(date); map.setTargetDateofacceptance(date);
} }
map.setTargetSubjects(target.getSubjects().stream().map(s -> s.getValue()).collect(Collectors.toList())); map
map.setTargetAuthors(target.getCreators().stream().map(a -> a.getFullname()).collect(Collectors.toList())); .setTargetSubjects(
target.getSubjects().stream().map(OaBrokerTypedValue::getValue).collect(Collectors.toList()));
map
.setTargetAuthors(
target.getCreators().stream().map(OaBrokerAuthor::getFullname).collect(Collectors.toList()));
// PROVENANCE INFO // PROVENANCE INFO
map.setTrust(updateInfo.getTrust()); map.setTrust(updateInfo.getTrust());

View File

@ -10,15 +10,11 @@ import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.TypedColumn;
import org.apache.spark.sql.expressions.Aggregator; import org.apache.spark.sql.expressions.Aggregator;
import org.apache.spark.util.LongAccumulator; import org.apache.spark.util.LongAccumulator;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.broker.model.Event; import eu.dnetlib.dhp.broker.model.Event;
import eu.dnetlib.dhp.broker.oa.util.ClusterUtils; import eu.dnetlib.dhp.broker.oa.util.ClusterUtils;
@ -88,8 +84,7 @@ class CountAggregator extends Aggregator<Tuple2<String, Long>, Tuple2<String, Lo
@Override @Override
public Tuple2<String, Long> merge(final Tuple2<String, Long> arg0, final Tuple2<String, Long> arg1) { public Tuple2<String, Long> merge(final Tuple2<String, Long> arg0, final Tuple2<String, Long> arg1) {
final String s = StringUtils.defaultIfBlank(arg0._1, arg1._1); return doMerge(arg0, arg1);
return new Tuple2<>(s, arg0._2 + arg1._2);
} }
@Override @Override
@ -99,6 +94,10 @@ class CountAggregator extends Aggregator<Tuple2<String, Long>, Tuple2<String, Lo
@Override @Override
public Tuple2<String, Long> reduce(final Tuple2<String, Long> arg0, final Tuple2<String, Long> arg1) { public Tuple2<String, Long> reduce(final Tuple2<String, Long> arg0, final Tuple2<String, Long> arg1) {
return doMerge(arg0, arg1);
}
private Tuple2<String, Long> doMerge(final Tuple2<String, Long> arg0, final Tuple2<String, Long> arg1) {
final String s = StringUtils.defaultIfBlank(arg0._1, arg1._1); final String s = StringUtils.defaultIfBlank(arg0._1, arg1._1);
return new Tuple2<>(s, arg0._2 + arg1._2); return new Tuple2<>(s, arg0._2 + arg1._2);
} }

Some files were not shown because too many files have changed in this diff Show More