diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java index 10a25fdc3..a642dab70 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java @@ -8,8 +8,6 @@ import java.util.List; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; import org.apache.maven.plugin.AbstractMojo; -import org.apache.maven.plugin.MojoExecutionException; -import org.apache.maven.plugin.MojoFailureException; /** * Generates oozie properties which were not provided from commandline. @@ -27,7 +25,7 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo { }; @Override - public void execute() throws MojoExecutionException, MojoFailureException { + public void execute() { if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR) && !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) { String generatedSandboxName = generateSandboxName( @@ -46,24 +44,24 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo { /** * Generates sandbox name from workflow source directory. * - * @param wfSourceDir + * @param wfSourceDir workflow source directory * @return generated sandbox name */ private String generateSandboxName(String wfSourceDir) { // utilize all dir names until finding one of the limiters - List sandboxNameParts = new ArrayList(); + List sandboxNameParts = new ArrayList<>(); String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar); ArrayUtils.reverse(tokens); if (tokens.length > 0) { for (String token : tokens) { for (String limiter : limiters) { if (limiter.equals(token)) { - return sandboxNameParts.size() > 0 + return !sandboxNameParts.isEmpty() ? StringUtils.join(sandboxNameParts.toArray()) : null; } } - if (sandboxNameParts.size() > 0) { + if (!sandboxNameParts.isEmpty()) { sandboxNameParts.add(0, File.separator); } sandboxNameParts.add(0, token); diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java index d195ca86e..e3cdf5a22 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java @@ -16,6 +16,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -289,7 +290,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo { */ protected List getEscapeChars(String escapeChars) { List tokens = getListFromCSV(escapeChars); - List realTokens = new ArrayList(); + List realTokens = new ArrayList<>(); for (String token : tokens) { String realToken = getRealToken(token); realTokens.add(realToken); @@ -324,7 +325,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo { * @return content */ protected String getContent(String comment, Properties properties, List escapeTokens) { - List names = new ArrayList(properties.stringPropertyNames()); + List names = new ArrayList<>(properties.stringPropertyNames()); Collections.sort(names); StringBuilder sb = new StringBuilder(); if (!StringUtils.isBlank(comment)) { @@ -352,7 +353,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo { throws MojoExecutionException { try { String content = getContent(comment, properties, escapeTokens); - FileUtils.writeStringToFile(file, content, ENCODING_UTF8); + FileUtils.writeStringToFile(file, content, StandardCharsets.UTF_8); } catch (IOException e) { throw new MojoExecutionException("Error creating properties file", e); } @@ -399,9 +400,9 @@ public class WritePredefinedProjectProperties extends AbstractMojo { */ protected static final List getListFromCSV(String csv) { if (StringUtils.isBlank(csv)) { - return new ArrayList(); + return new ArrayList<>(); } - List list = new ArrayList(); + List list = new ArrayList<>(); String[] tokens = StringUtils.split(csv, ","); for (String token : tokens) { list.add(token.trim()); diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java index 4bfcd3b33..2ff6bea30 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java @@ -9,18 +9,18 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; /** @author mhorst, claudio.atzori */ -public class GenerateOoziePropertiesMojoTest { +class GenerateOoziePropertiesMojoTest { private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo(); @BeforeEach - public void clearSystemProperties() { + void clearSystemProperties() { System.clearProperty(PROPERTY_NAME_SANDBOX_NAME); System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR); } @Test - public void testExecuteEmpty() throws Exception { + void testExecuteEmpty() throws Exception { // execute mojo.execute(); @@ -29,7 +29,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteSandboxNameAlreadySet() throws Exception { + void testExecuteSandboxNameAlreadySet() throws Exception { // given String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers"; String sandboxName = "originalSandboxName"; @@ -44,7 +44,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteEmptyWorkflowSourceDir() throws Exception { + void testExecuteEmptyWorkflowSourceDir() throws Exception { // given String workflowSourceDir = ""; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); @@ -57,7 +57,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteNullSandboxNameGenerated() throws Exception { + void testExecuteNullSandboxNameGenerated() throws Exception { // given String workflowSourceDir = "eu/dnetlib/dhp/"; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); @@ -70,7 +70,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecute() throws Exception { + void testExecute() throws Exception { // given String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers"; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); @@ -83,7 +83,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteWithoutRoot() throws Exception { + void testExecuteWithoutRoot() throws Exception { // given String workflowSourceDir = "wf/transformers"; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java index 0b3ea9653..84b962b4b 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java @@ -20,7 +20,7 @@ import org.mockito.junit.jupiter.MockitoExtension; /** @author mhorst, claudio.atzori */ @ExtendWith(MockitoExtension.class) -public class WritePredefinedProjectPropertiesTest { +class WritePredefinedProjectPropertiesTest { @Mock private MavenProject mavenProject; @@ -39,7 +39,7 @@ public class WritePredefinedProjectPropertiesTest { // ----------------------------------- TESTS --------------------------------------------- @Test - public void testExecuteEmpty() throws Exception { + void testExecuteEmpty() throws Exception { // execute mojo.execute(); @@ -50,7 +50,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithProjectProperties() throws Exception { + void testExecuteWithProjectProperties() throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -70,7 +70,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test() - public void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) { + void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -84,7 +84,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception { + void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -108,7 +108,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception { + void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -132,7 +132,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception { + void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -164,7 +164,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder) + void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; @@ -194,7 +194,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromBlankLocation() { + void testExecuteIncludingPropertyKeysFromBlankLocation() { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -214,7 +214,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder) + void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; @@ -247,7 +247,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder) + void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; @@ -273,7 +273,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception { + void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception { // given mojo.setQuiet(true); mojo.setIncludePropertyKeysFromFiles(new String[] { @@ -290,7 +290,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromInvalidFile() { + void testExecuteIncludingPropertyKeysFromInvalidFile() { // given mojo.setIncludePropertyKeysFromFiles(new String[] { "invalid location" @@ -301,7 +301,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception { + void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception { // given mojo.setIncludeEnvironmentVariables(true); @@ -318,7 +318,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception { + void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception { // given String key = "systemPropertyKey"; String value = "systemPropertyValue"; @@ -337,7 +337,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder) + void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder) throws Exception { // given String key = "systemPropertyKey "; diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java deleted file mode 100644 index c53b83561..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java +++ /dev/null @@ -1,14 +0,0 @@ - -package eu.dnetlib.dhp.application; - -import java.io.*; -import java.util.Map; -import java.util.Properties; - -import org.apache.hadoop.conf.Configuration; - -import com.google.common.collect.Maps; - -public class ApplicationUtils { - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java index 0429bc25d..72c1f6a5e 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java @@ -56,13 +56,13 @@ public class ArgumentApplicationParser implements Serializable { final StringWriter stringWriter = new StringWriter(); IOUtils.copy(gis, stringWriter); return stringWriter.toString(); - } catch (Throwable e) { - log.error("Wrong value to decompress:" + abstractCompressed); - throw new RuntimeException(e); + } catch (IOException e) { + log.error("Wrong value to decompress: {}", abstractCompressed); + throw new IllegalArgumentException(e); } } - public static String compressArgument(final String value) throws Exception { + public static String compressArgument(final String value) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); GZIPOutputStream gzip = new GZIPOutputStream(out); gzip.write(value.getBytes()); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java b/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java index 7004112e4..f34326d67 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java @@ -9,9 +9,6 @@ public class OptionsParameter { private boolean paramRequired; private boolean compressed; - public OptionsParameter() { - } - public String getParamName() { return paramName; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java b/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java index 12937a197..fbbbffcbb 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java @@ -34,7 +34,7 @@ public class ApiDescriptor { return params; } - public void setParams(final HashMap params) { + public void setParams(final Map params) { this.params = params; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java index 108edad47..8fab94e92 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java @@ -12,6 +12,9 @@ public class Constants { public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/"; + private Constants() { + } + static { accessRightsCoarMap.put("OPEN", "c_abf2"); accessRightsCoarMap.put("RESTRICTED", "c_16ec"); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java index 44599eb83..8ceee5c8a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java @@ -84,7 +84,7 @@ public class GraphResultMapper implements Serializable { .setDocumentationUrl( value .stream() - .map(v -> v.getValue()) + .map(Field::getValue) .collect(Collectors.toList()))); Optional @@ -100,20 +100,20 @@ public class GraphResultMapper implements Serializable { .setContactgroup( Optional .ofNullable(ir.getContactgroup()) - .map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setContactperson( Optional .ofNullable(ir.getContactperson()) - .map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setTool( Optional .ofNullable(ir.getTool()) - .map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); @@ -123,7 +123,8 @@ public class GraphResultMapper implements Serializable { Optional .ofNullable(input.getAuthor()) - .ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList()))); + .ifPresent( + ats -> out.setAuthor(ats.stream().map(GraphResultMapper::getAuthor).collect(Collectors.toList()))); // I do not map Access Right UNKNOWN or OTHER @@ -210,7 +211,7 @@ public class GraphResultMapper implements Serializable { if (oInst.isPresent()) { out .setInstance( - oInst.get().stream().map(i -> getInstance(i)).collect(Collectors.toList())); + oInst.get().stream().map(GraphResultMapper::getInstance).collect(Collectors.toList())); } @@ -230,7 +231,7 @@ public class GraphResultMapper implements Serializable { .stream() .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) .collect(Collectors.toList()); - if (iTitle.size() > 0) { + if (!iTitle.isEmpty()) { out.setMaintitle(iTitle.get(0).getValue()); } @@ -239,7 +240,7 @@ public class GraphResultMapper implements Serializable { .stream() .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) .collect(Collectors.toList()); - if (iTitle.size() > 0) { + if (!iTitle.isEmpty()) { out.setSubtitle(iTitle.get(0).getValue()); } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java index 7dc0e4417..d0909642c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java @@ -14,38 +14,33 @@ public class MakeTarArchive implements Serializable { private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException { Path hdfsWritePath = new Path(outputPath); - FSDataOutputStream fsDataOutputStream = null; if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, true); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); - - return new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream()); + return new TarArchiveOutputStream(fileSystem.create(hdfsWritePath).getWrappedStream()); } private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name) throws IOException { Path hdfsWritePath = new Path(outputPath); - FSDataOutputStream fsDataOutputStream = null; if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, true); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + try (TarArchiveOutputStream ar = new TarArchiveOutputStream( + fileSystem.create(hdfsWritePath).getWrappedStream())) { - TarArchiveOutputStream ar = new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream()); + RemoteIterator iterator = fileSystem + .listFiles( + new Path(inputPath), true); - RemoteIterator fileStatusListIterator = fileSystem - .listFiles( - new Path(inputPath), true); + while (iterator.hasNext()) { + writeCurrentFile(fileSystem, dir_name, iterator, ar, 0); + } - while (fileStatusListIterator.hasNext()) { - writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, 0); } - - ar.close(); } public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name, diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java index 0bc782ccb..d06544ae1 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java @@ -10,8 +10,6 @@ import java.util.Optional; import java.util.stream.StreamSupport; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.bson.Document; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -21,6 +19,7 @@ import com.mongodb.BasicDBObject; import com.mongodb.MongoClient; import com.mongodb.MongoClientURI; import com.mongodb.QueryBuilder; +import com.mongodb.client.FindIterable; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoDatabase; @@ -46,7 +45,7 @@ public class MdstoreClient implements Closeable { final String currentId = Optional .ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query)) - .map(r -> r.first()) + .map(FindIterable::first) .map(d -> d.getString("currentId")) .orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId)); @@ -84,7 +83,7 @@ public class MdstoreClient implements Closeable { if (!Iterables.contains(client.listDatabaseNames(), dbName)) { final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress()); log.warn(err); - throw new RuntimeException(err); + throw new IllegalArgumentException(err); } return client.getDatabase(dbName); } @@ -97,7 +96,7 @@ public class MdstoreClient implements Closeable { String.format("Missing collection '%s' in database '%s'", collName, db.getName())); log.warn(err); if (abortIfMissing) { - throw new RuntimeException(err); + throw new IllegalArgumentException(err); } else { return null; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java index 6e02ca614..91c6c1825 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java @@ -24,7 +24,6 @@ import com.google.common.hash.Hashing; */ public class PacePerson { - private static final String UTF8 = "UTF-8"; private List name = Lists.newArrayList(); private List surname = Lists.newArrayList(); private List fullname = Lists.newArrayList(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java index 1f267733d..3f5c6ad4a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java @@ -5,6 +5,9 @@ import java.io.*; import java.io.IOException; import java.util.concurrent.TimeUnit; +import org.apache.http.HttpHeaders; +import org.apache.http.entity.ContentType; + import com.google.gson.Gson; import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; @@ -43,7 +46,7 @@ public class ZenodoAPIClient implements Serializable { this.deposition_id = deposition_id; } - public ZenodoAPIClient(String urlString, String access_token) throws IOException { + public ZenodoAPIClient(String urlString, String access_token) { this.urlString = urlString; this.access_token = access_token; @@ -63,8 +66,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .post(body) .build(); @@ -103,8 +106,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(bucket + "/" + file_name) - .addHeader("Content-Type", "application/zip") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len)) .build(); @@ -130,8 +133,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString + "/" + deposition_id) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .put(body) .build(); @@ -197,7 +200,7 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString + "/" + deposition_id + "/actions/newversion") - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .post(body) .build(); @@ -270,8 +273,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .get() .build(); @@ -293,8 +296,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(url) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .get() .build(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java index c03762693..c14af55b6 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java @@ -32,13 +32,13 @@ public class Creator { public static Creator newInstance(String name, String affiliation, String orcid) { Creator c = new Creator(); - if (!(name == null)) { + if (name != null) { c.name = name; } - if (!(affiliation == null)) { + if (affiliation != null) { c.affiliation = affiliation; } - if (!(orcid == null)) { + if (orcid != null) { c.orcid = orcid; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java index c7428de7d..509f444b9 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java @@ -3,17 +3,12 @@ package eu.dnetlib.dhp.common.api.zenodo; import java.io.Serializable; -import net.minidev.json.annotate.JsonIgnore; - public class File implements Serializable { private String checksum; private String filename; private long filesize; private String id; - @JsonIgnore - // private Links links; - public String getChecksum() { return checksum; } @@ -46,13 +41,4 @@ public class File implements Serializable { this.id = id; } -// @JsonIgnore -// public Links getLinks() { -// return links; -// } -// -// @JsonIgnore -// public void setLinks(Links links) { -// this.links = links; -// } } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java index 98dabf56a..af6926cc7 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java @@ -1,11 +1,11 @@ package eu.dnetlib.dhp.common.rest; +import java.io.IOException; import java.util.Arrays; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; -import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpUriRequest; @@ -23,17 +23,20 @@ public class DNetRestClient { private static final ObjectMapper mapper = new ObjectMapper(); + private DNetRestClient() { + } + public static T doGET(final String url, Class clazz) throws Exception { final HttpGet httpGet = new HttpGet(url); return doHTTPRequest(httpGet, clazz); } - public static String doGET(final String url) throws Exception { + public static String doGET(final String url) throws IOException { final HttpGet httpGet = new HttpGet(url); return doHTTPRequest(httpGet); } - public static String doPOST(final String url, V objParam) throws Exception { + public static String doPOST(final String url, V objParam) throws IOException { final HttpPost httpPost = new HttpPost(url); if (objParam != null) { @@ -45,25 +48,25 @@ public class DNetRestClient { return doHTTPRequest(httpPost); } - public static T doPOST(final String url, V objParam, Class clazz) throws Exception { + public static T doPOST(final String url, V objParam, Class clazz) throws IOException { return mapper.readValue(doPOST(url, objParam), clazz); } - private static String doHTTPRequest(final HttpUriRequest r) throws Exception { - CloseableHttpClient client = HttpClients.createDefault(); + private static String doHTTPRequest(final HttpUriRequest r) throws IOException { + try (CloseableHttpClient client = HttpClients.createDefault()) { - log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString()); - log - .info( - "request headers: {}", - Arrays - .asList(r.getAllHeaders()) - .stream() - .map(h -> h.getName() + ":" + h.getValue()) - .collect(Collectors.joining(","))); + log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString()); + log + .info( + "request headers: {}", + Arrays + .asList(r.getAllHeaders()) + .stream() + .map(h -> h.getName() + ":" + h.getValue()) + .collect(Collectors.joining(","))); - CloseableHttpResponse response = client.execute(r); - return IOUtils.toString(response.getEntity().getContent()); + return IOUtils.toString(client.execute(r).getEntity().getContent()); + } } private static T doHTTPRequest(final HttpUriRequest r, Class clazz) throws Exception { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java index a9daede8f..b3eb98d4f 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java @@ -46,7 +46,7 @@ public class Vocabulary implements Serializable { } public VocabularyTerm getTerm(final String id) { - return Optional.ofNullable(id).map(s -> s.toLowerCase()).map(s -> terms.get(s)).orElse(null); + return Optional.ofNullable(id).map(String::toLowerCase).map(terms::get).orElse(null); } protected void addTerm(final String id, final String name) { @@ -81,7 +81,6 @@ public class Vocabulary implements Serializable { .ofNullable(getTermBySynonym(syn)) .map(term -> getTermAsQualifier(term.getId())) .orElse(null); - // .orElse(OafMapperUtils.unknown(getId(), getName())); } } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java index a89bb486f..d5f57849c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java @@ -46,7 +46,6 @@ public class VocabularyGroup implements Serializable { } vocs.addTerm(vocId, termId, termName); - // vocs.addSynonyms(vocId, termId, termId); } } @@ -58,7 +57,6 @@ public class VocabularyGroup implements Serializable { final String syn = arr[2].trim(); vocs.addSynonyms(vocId, termId, syn); - // vocs.addSynonyms(vocId, termId, termId); } } @@ -98,7 +96,7 @@ public class VocabularyGroup implements Serializable { .getTerms() .values() .stream() - .map(t -> t.getId()) + .map(VocabularyTerm::getId) .collect(Collectors.toCollection(HashSet::new)); } @@ -154,16 +152,19 @@ public class VocabularyGroup implements Serializable { return Optional .ofNullable(vocId) .map(String::toLowerCase) - .map(id -> vocs.containsKey(id)) + .map(vocs::containsKey) .orElse(false); } private void addSynonyms(final String vocId, final String termId, final String syn) { String id = Optional .ofNullable(vocId) - .map(s -> s.toLowerCase()) + .map(String::toLowerCase) .orElseThrow( - () -> new IllegalArgumentException(String.format("empty vocabulary id for [term:%s, synonym:%s]"))); + () -> new IllegalArgumentException( + String + .format( + "empty vocabulary id for [term:%s, synonym:%s]", termId, syn))); Optional .ofNullable(vocs.get(id)) .orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId)) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java b/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java index f1107b4b8..c7a0b5f50 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.message; import java.io.Serializable; -import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; @@ -10,8 +9,8 @@ public class Message implements Serializable { private static final long serialVersionUID = 401753881204524893L; - public static String CURRENT_PARAM = "current"; - public static String TOTAL_PARAM = "total"; + public static final String CURRENT_PARAM = "current"; + public static final String TOTAL_PARAM = "total"; private MessageType messageType; diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java index 7a8e55a6e..aea046203 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.merge; import java.text.Normalizer; import java.util.*; import java.util.stream.Collectors; -import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; @@ -19,6 +18,9 @@ public class AuthorMerger { private static final Double THRESHOLD = 0.95; + private AuthorMerger() { + } + public static List merge(List> authors) { authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2))); @@ -36,7 +38,8 @@ public class AuthorMerger { public static List mergeAuthor(final List a, final List b, Double threshold) { int pa = countAuthorsPids(a); int pb = countAuthorsPids(b); - List base, enrich; + List base; + List enrich; int sa = authorsSize(a); int sb = authorsSize(b); @@ -62,7 +65,7 @@ public class AuthorMerger { // (if an Author has more than 1 pid, it appears 2 times in the list) final Map basePidAuthorMap = base .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .flatMap( a -> a .getPid() @@ -74,7 +77,7 @@ public class AuthorMerger { // (list of pid that are missing in the other list) final List> pidToEnrich = enrich .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .flatMap( a -> a .getPid() @@ -117,9 +120,9 @@ public class AuthorMerger { } public static String pidToComparableString(StructuredProperty pid) { - return (pid.getQualifier() != null - ? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : "" - : "") + final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() + : ""; + return (pid.getQualifier() != null ? classid : "") + (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java b/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java index 9ac0a0bf7..fd4c0191a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java @@ -12,6 +12,9 @@ import com.ximpleware.VTDNav; /** Created by sandro on 9/29/16. */ public class VtdUtilityParser { + private VtdUtilityParser() { + } + public static List getTextValuesWithAttributes( final AutoPilot ap, final VTDNav vn, final String xpath, final List attributes) throws VtdException { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 1d002ed7e..d8b1cded8 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -284,7 +284,7 @@ public class GraphCleaningFunctions extends CleaningFunctions { r .getAuthor() .stream() - .filter(a -> Objects.nonNull(a)) + .filter(Objects::nonNull) .filter(a -> StringUtils.isNotBlank(a.getFullname())) .filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", ""))) .collect(Collectors.toList())); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index c6a8fd5a7..720fe47fb 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -17,13 +17,16 @@ import eu.dnetlib.dhp.schema.oaf.*; public class OafMapperUtils { + private OafMapperUtils() { + } + public static Oaf merge(final Oaf left, final Oaf right) { if (ModelSupport.isSubClass(left, OafEntity.class)) { return mergeEntities((OafEntity) left, (OafEntity) right); } else if (ModelSupport.isSubClass(left, Relation.class)) { ((Relation) left).mergeFrom((Relation) right); } else { - throw new RuntimeException("invalid Oaf type:" + left.getClass().getCanonicalName()); + throw new IllegalArgumentException("invalid Oaf type:" + left.getClass().getCanonicalName()); } return left; } @@ -38,7 +41,7 @@ public class OafMapperUtils { } else if (ModelSupport.isSubClass(left, Project.class)) { left.mergeFrom(right); } else { - throw new RuntimeException("invalid OafEntity subtype:" + left.getClass().getCanonicalName()); + throw new IllegalArgumentException("invalid OafEntity subtype:" + left.getClass().getCanonicalName()); } return left; } @@ -62,7 +65,7 @@ public class OafMapperUtils { public static List listKeyValues(final String... s) { if (s.length % 2 > 0) { - throw new RuntimeException("Invalid number of parameters (k,v,k,v,....)"); + throw new IllegalArgumentException("Invalid number of parameters (k,v,k,v,....)"); } final List list = new ArrayList<>(); @@ -88,7 +91,7 @@ public class OafMapperUtils { .stream(values) .map(v -> field(v, info)) .filter(Objects::nonNull) - .filter(distinctByKey(f -> f.getValue())) + .filter(distinctByKey(Field::getValue)) .collect(Collectors.toList()); } @@ -97,7 +100,7 @@ public class OafMapperUtils { .stream() .map(v -> field(v, info)) .filter(Objects::nonNull) - .filter(distinctByKey(f -> f.getValue())) + .filter(distinctByKey(Field::getValue)) .collect(Collectors.toList()); } @@ -342,10 +345,10 @@ public class OafMapperUtils { if (instanceList != null) { final Optional min = instanceList .stream() - .map(i -> i.getAccessright()) + .map(Instance::getAccessright) .min(new AccessRightComparator<>()); - final Qualifier rights = min.isPresent() ? qualifier(min.get()) : new Qualifier(); + final Qualifier rights = min.map(OafMapperUtils::qualifier).orElseGet(Qualifier::new); if (StringUtils.isBlank(rights.getClassid())) { rights.setClassid(UNKNOWN); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java index 8d760a2cd..6a86f30df 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java @@ -34,6 +34,9 @@ public class DHPUtils { private static final Logger log = LoggerFactory.getLogger(DHPUtils.class); + private DHPUtils() { + } + public static Seq toSeq(List list) { return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq(); } @@ -44,7 +47,7 @@ public class DHPUtils { md.update(s.getBytes(StandardCharsets.UTF_8)); return new String(Hex.encodeHex(md.digest())); } catch (final Exception e) { - System.err.println("Error creating id"); + log.error("Error creating id from {}", s); return null; } } @@ -53,33 +56,6 @@ public class DHPUtils { return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId)); } - public static String compressString(final String input) { - try (ByteArrayOutputStream out = new ByteArrayOutputStream(); - Base64OutputStream b64os = new Base64OutputStream(out)) { - GZIPOutputStream gzip = new GZIPOutputStream(b64os); - gzip.write(input.getBytes(StandardCharsets.UTF_8)); - gzip.close(); - return out.toString(); - } catch (Throwable e) { - return null; - } - } - - public static String decompressString(final String input) { - byte[] byteArray = Base64.decodeBase64(input.getBytes()); - int len; - try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray))); - ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) { - byte[] buffer = new byte[1024]; - while ((len = gis.read(buffer)) != -1) { - bos.write(buffer, 0, len); - } - return bos.toString(); - } catch (Exception e) { - return null; - } - } - public static String getJPathString(final String jsonPath, final String json) { try { Object o = JsonPath.read(json, jsonPath); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java index b326c4159..8ae0bb5c3 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java @@ -18,13 +18,16 @@ public class ISLookupClientFactory { private static final int requestTimeout = 60000 * 10; private static final int connectTimeout = 60000 * 10; + private ISLookupClientFactory() { + } + public static ISLookUpService getLookUpService(final String isLookupUrl) { return getServiceStub(ISLookUpService.class, isLookupUrl); } @SuppressWarnings("unchecked") private static T getServiceStub(final Class clazz, final String endpoint) { - log.info(String.format("creating %s stub from %s", clazz.getName(), endpoint)); + log.info("creating {} stub from {}", clazz.getName(), endpoint); final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean(); jaxWsProxyFactory.setServiceClass(clazz); jaxWsProxyFactory.setAddress(endpoint); @@ -38,12 +41,10 @@ public class ISLookupClientFactory { log .info( - String - .format( - "setting connectTimeout to %s, requestTimeout to %s for service %s", - connectTimeout, - requestTimeout, - clazz.getCanonicalName())); + "setting connectTimeout to {}, requestTimeout to {} for service {}", + connectTimeout, + requestTimeout, + clazz.getCanonicalName()); policy.setConnectionTimeout(connectTimeout); policy.setReceiveTimeout(requestTimeout); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java index 9b00b908c..81f1b5142 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java @@ -10,7 +10,7 @@ import net.sf.saxon.trans.XPathException; public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition { - public static String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension"; + public static final String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension"; public abstract String getName(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java index c7e311b02..1ea2b9f46 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java @@ -26,7 +26,7 @@ public class ExtractYear extends AbstractExtensionFunction { @Override public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException { - if (arguments == null | arguments.length == 0) { + if (arguments == null || arguments.length == 0) { return new StringValue(""); } final Item item = arguments[0].head(); @@ -63,8 +63,7 @@ public class ExtractYear extends AbstractExtensionFunction { for (String format : dateFormats) { try { c.setTime(new SimpleDateFormat(format).parse(s)); - String year = String.valueOf(c.get(Calendar.YEAR)); - return year; + return String.valueOf(c.get(Calendar.YEAR)); } catch (ParseException e) { } } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java index 1b5f3c40d..3e5def9b5 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java @@ -30,7 +30,7 @@ public class NormalizeDate extends AbstractExtensionFunction { @Override public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException { - if (arguments == null | arguments.length == 0) { + if (arguments == null || arguments.length == 0) { return new StringValue(BLANK); } String s = arguments[0].head().getStringValue(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java index 46ecafd0a..b46a415d8 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.utils.saxon; +import static org.apache.commons.lang3.StringUtils.isNotBlank; + import org.apache.commons.lang3.StringUtils; import net.sf.saxon.expr.XPathContext; @@ -26,7 +28,8 @@ public class PickFirst extends AbstractExtensionFunction { final String s1 = getValue(arguments[0]); final String s2 = getValue(arguments[1]); - return new StringValue(StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : ""); + final String value = isNotBlank(s1) ? s1 : isNotBlank(s2) ? s2 : ""; + return new StringValue(value); } private String getValue(final Sequence arg) throws XPathException { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java index b85d866f1..61049d2e1 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java @@ -12,6 +12,9 @@ import net.sf.saxon.TransformerFactoryImpl; public class SaxonTransformerFactory { + private SaxonTransformerFactory() { + } + /** * Creates the index record transformer from the given XSLT * diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java index e14020830..1788239f2 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java @@ -7,10 +7,10 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import org.apache.commons.io.IOUtils; import org.junit.jupiter.api.Test; -public class ArgumentApplicationParserTest { +class ArgumentApplicationParserTest { @Test - public void testParseParameter() throws Exception { + void testParseParameter() throws Exception { final String jsonConfiguration = IOUtils .toString( this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json")); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java index 870943816..fa721d5e5 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java @@ -21,13 +21,13 @@ public class HdfsSupportTest { class Remove { @Test - public void shouldThrowARuntimeExceptionOnError() { + void shouldThrowARuntimeExceptionOnError() { // when assertThrows(RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration())); } @Test - public void shouldRemoveADirFromHDFS(@TempDir Path tempDir) { + void shouldRemoveADirFromHDFS(@TempDir Path tempDir) { // when HdfsSupport.remove(tempDir.toString(), new Configuration()); @@ -36,7 +36,7 @@ public class HdfsSupportTest { } @Test - public void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException { + void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException { // given Path file = Files.createTempFile(tempDir, "p", "s"); @@ -52,13 +52,13 @@ public class HdfsSupportTest { class ListFiles { @Test - public void shouldThrowARuntimeExceptionOnError() { + void shouldThrowARuntimeExceptionOnError() { // when assertThrows(RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration())); } @Test - public void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException { + void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException { Path subDir1 = Files.createTempDirectory(tempDir, "list_me"); Path subDir2 = Files.createTempDirectory(tempDir, "list_me"); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java index 5ebd7213e..cb9ae2886 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java @@ -5,10 +5,10 @@ import static org.junit.jupiter.api.Assertions.*; import org.junit.jupiter.api.Test; -public class PacePersonTest { +class PacePersonTest { @Test - public void pacePersonTest1() { + void pacePersonTest1() { PacePerson p = new PacePerson("Artini, Michele", false); assertEquals("Artini", p.getSurnameString()); @@ -17,7 +17,7 @@ public class PacePersonTest { } @Test - public void pacePersonTest2() { + void pacePersonTest2() { PacePerson p = new PacePerson("Michele G. Artini", false); assertEquals("Artini, Michele G.", p.getNormalisedFullname()); assertEquals("Michele G", p.getNameString()); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java index 2f01c0863..8fa966c2f 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java @@ -18,7 +18,8 @@ public class SparkSessionSupportTest { class RunWithSparkSession { @Test - public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() + @SuppressWarnings("unchecked") + void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() throws Exception { // given SparkSession spark = mock(SparkSession.class); @@ -37,7 +38,8 @@ public class SparkSessionSupportTest { } @Test - public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() + @SuppressWarnings("unchecked") + void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() throws Exception { // given SparkSession spark = mock(SparkSession.class); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java index 9ae9c33c2..2ccaed3e4 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java @@ -12,7 +12,7 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @Disabled -public class ZenodoAPIClientTest { +class ZenodoAPIClientTest { private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions"; private final String ACCESS_TOKEN = ""; @@ -22,7 +22,7 @@ public class ZenodoAPIClientTest { private final String depositionId = "674915"; @Test - public void testUploadOldDeposition() throws IOException, MissingConceptDoiException { + void testUploadOldDeposition() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId)); @@ -44,7 +44,7 @@ public class ZenodoAPIClientTest { } @Test - public void testNewDeposition() throws IOException { + void testNewDeposition() throws IOException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); @@ -67,7 +67,7 @@ public class ZenodoAPIClientTest { } @Test - public void testNewVersionNewName() throws IOException, MissingConceptDoiException { + void testNewVersionNewName() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); @@ -87,7 +87,7 @@ public class ZenodoAPIClientTest { } @Test - public void testNewVersionOldName() throws IOException, MissingConceptDoiException { + void testNewVersionOldName() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java index 9c4e62214..3a7a41a1b 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java @@ -21,7 +21,7 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.pace.util.MapDocumentUtil; import scala.Tuple2; -public class AuthorMergerTest { +class AuthorMergerTest { private String publicationsBasePath; @@ -43,7 +43,7 @@ public class AuthorMergerTest { } @Test - public void mergeTest() { // used in the dedup: threshold set to 0.95 + void mergeTest() { // used in the dedup: threshold set to 0.95 for (List authors1 : authors) { System.out.println("List " + (authors.indexOf(authors1) + 1)); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java index 8d519a93f..4068f0abb 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java @@ -21,7 +21,7 @@ import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Result; import me.xuender.unidecode.Unidecode; -public class OafMapperUtilsTest { +class OafMapperUtilsTest { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @@ -42,7 +42,7 @@ public class OafMapperUtilsTest { } @Test - public void testDateValidation() { + void testDateValidation() { assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent()); assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent()); @@ -147,44 +147,46 @@ public class OafMapperUtilsTest { } @Test - public void testDate() { - System.out.println(GraphCleaningFunctions.cleanDate("23-FEB-1998")); + void testDate() { + final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998"); + assertNotNull(date); + System.out.println(date); } @Test - public void testMergePubs() throws IOException { + void testMergePubs() throws IOException { Publication p1 = read("publication_1.json", Publication.class); Publication p2 = read("publication_2.json", Publication.class); Dataset d1 = read("dataset_1.json", Dataset.class); Dataset d2 = read("dataset_2.json", Dataset.class); - assertEquals(p1.getCollectedfrom().size(), 1); - assertEquals(p1.getCollectedfrom().get(0).getKey(), ModelConstants.CROSSREF_ID); - assertEquals(d2.getCollectedfrom().size(), 1); + assertEquals(1, p1.getCollectedfrom().size()); + assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey()); + assertEquals(1, d2.getCollectedfrom().size()); assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); - assertTrue( + assertEquals( + ModelConstants.PUBLICATION_RESULTTYPE_CLASSID, OafMapperUtils .mergeResults(p1, d2) .getResulttype() - .getClassid() - .equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID)); + .getClassid()); - assertEquals(p2.getCollectedfrom().size(), 1); + assertEquals(1, p2.getCollectedfrom().size()); assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); - assertEquals(d1.getCollectedfrom().size(), 1); + assertEquals(1, d1.getCollectedfrom().size()); assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); - assertTrue( + assertEquals( + ModelConstants.DATASET_RESULTTYPE_CLASSID, OafMapperUtils .mergeResults(p2, d1) .getResulttype() - .getClassid() - .equals(ModelConstants.DATASET_RESULTTYPE_CLASSID)); + .getClassid()); } protected HashSet cfId(List collectedfrom) { - return collectedfrom.stream().map(c -> c.getKey()).collect(Collectors.toCollection(HashSet::new)); + return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new)); } protected T read(String filename, Class clazz) throws IOException { diff --git a/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java b/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java index d1d1ada71..5743b0831 100644 --- a/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java @@ -3,10 +3,10 @@ package eu.dnetlib.scholexplorer.relation; import org.junit.jupiter.api.Test; -public class RelationMapperTest { +class RelationMapperTest { @Test - public void testLoadRels() throws Exception { + void testLoadRels() throws Exception { RelationMapper relationMapper = RelationMapper.load(); relationMapper.keySet().forEach(System.out::println); diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java index 5a80c0b53..088e618c7 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java @@ -3,40 +3,37 @@ package eu.dnetlib.dhp.actionmanager; import java.io.Serializable; import java.io.StringReader; -import java.util.*; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Triple; import org.dom4j.Document; import org.dom4j.DocumentException; -import org.dom4j.Element; import org.dom4j.io.SAXReader; -import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; import com.google.common.collect.Sets; import eu.dnetlib.actionmanager.rmi.ActionManagerException; -import eu.dnetlib.actionmanager.set.ActionManagerSet; -import eu.dnetlib.actionmanager.set.ActionManagerSet.ImpactTypes; -import eu.dnetlib.dhp.actionmanager.partition.PartitionActionSetsByPayloadTypeJob; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import scala.Tuple2; public class ISClient implements Serializable { - private static final Logger log = LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class); + private static final Logger log = LoggerFactory.getLogger(ISClient.class); private static final String INPUT_ACTION_SET_ID_SEPARATOR = ","; - private final ISLookUpService isLookup; + private final transient ISLookUpService isLookup; public ISClient(String isLookupUrl) { isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl); @@ -63,7 +60,7 @@ public class ISClient implements Serializable { .map( sets -> sets .stream() - .map(set -> parseSetInfo(set)) + .map(ISClient::parseSetInfo) .filter(t -> ids.contains(t.getLeft())) .map(t -> buildDirectory(basePath, t)) .collect(Collectors.toList())) @@ -73,15 +70,17 @@ public class ISClient implements Serializable { } } - private Triple parseSetInfo(String set) { + private static Triple parseSetInfo(String set) { try { - Document doc = new SAXReader().read(new StringReader(set)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + Document doc = reader.read(new StringReader(set)); return Triple .of( doc.valueOf("//SET/@id"), doc.valueOf("//SET/@directory"), doc.valueOf("//SET/@latest")); - } catch (DocumentException e) { + } catch (DocumentException | SAXException e) { throw new IllegalStateException(e); } } @@ -99,7 +98,7 @@ public class ISClient implements Serializable { final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='" + propertyName + "']/@value/string()"; - log.debug("quering for service property: " + q); + log.debug("quering for service property: {}", q); try { final List value = isLookup.quickSearchProfile(q); return Iterables.getOnlyElement(value); diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java index fbb072957..eccfa445c 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java @@ -62,6 +62,7 @@ public class MergeAndGet { x.getClass().getCanonicalName(), y.getClass().getCanonicalName())); } + @SuppressWarnings("unchecked") private static G selectNewerAndGet(G x, A y) { if (x.getClass().equals(y.getClass()) && x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) { diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java index 7893fcf8b..c5f252c97 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java @@ -74,7 +74,9 @@ public class PromoteActionPayloadForGraphTableJob { .orElse(true); logger.info("shouldGroupById: {}", shouldGroupById); + @SuppressWarnings("unchecked") Class rowClazz = (Class) Class.forName(graphTableClassName); + @SuppressWarnings("unchecked") Class actionPayloadClazz = (Class) Class.forName(actionPayloadClassName); throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz); @@ -152,7 +154,7 @@ public class PromoteActionPayloadForGraphTableJob { return spark .read() .parquet(path) - .map((MapFunction) value -> extractPayload(value), Encoders.STRING()) + .map((MapFunction) PromoteActionPayloadForGraphTableJob::extractPayload, Encoders.STRING()) .map( (MapFunction) value -> decodePayload(actionPayloadClazz, value), Encoders.bean(actionPayloadClazz)); diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java index f51c697f4..62eec13d5 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java @@ -80,7 +80,7 @@ public class PartitionActionSetsByPayloadTypeJobTest { private ISClient isClient; @Test - public void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception { + void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception { // given Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets"); Path outputDir = workingDir.resolve("output"); diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java index b2248d77a..4c88e9de3 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java @@ -20,7 +20,7 @@ public class MergeAndGetTest { class MergeFromAndGetStrategy { @Test - public void shouldThrowForOafAndOaf() { + void shouldThrowForOafAndOaf() { // given Oaf a = mock(Oaf.class); Oaf b = mock(Oaf.class); @@ -33,7 +33,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafAndRelation() { + void shouldThrowForOafAndRelation() { // given Oaf a = mock(Oaf.class); Relation b = mock(Relation.class); @@ -46,7 +46,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafAndOafEntity() { + void shouldThrowForOafAndOafEntity() { // given Oaf a = mock(Oaf.class); OafEntity b = mock(OafEntity.class); @@ -59,7 +59,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForRelationAndOaf() { + void shouldThrowForRelationAndOaf() { // given Relation a = mock(Relation.class); Oaf b = mock(Oaf.class); @@ -72,7 +72,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForRelationAndOafEntity() { + void shouldThrowForRelationAndOafEntity() { // given Relation a = mock(Relation.class); OafEntity b = mock(OafEntity.class); @@ -85,7 +85,7 @@ public class MergeAndGetTest { } @Test - public void shouldBehaveProperlyForRelationAndRelation() { + void shouldBehaveProperlyForRelationAndRelation() { // given Relation a = mock(Relation.class); Relation b = mock(Relation.class); @@ -101,7 +101,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndOaf() { + void shouldThrowForOafEntityAndOaf() { // given OafEntity a = mock(OafEntity.class); Oaf b = mock(Oaf.class); @@ -114,7 +114,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndRelation() { + void shouldThrowForOafEntityAndRelation() { // given OafEntity a = mock(OafEntity.class); Relation b = mock(Relation.class); @@ -127,7 +127,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndOafEntityButNotSubclasses() { + void shouldThrowForOafEntityAndOafEntityButNotSubclasses() { // given class OafEntitySub1 extends OafEntity { } @@ -145,7 +145,7 @@ public class MergeAndGetTest { } @Test - public void shouldBehaveProperlyForOafEntityAndOafEntity() { + void shouldBehaveProperlyForOafEntityAndOafEntity() { // given OafEntity a = mock(OafEntity.class); OafEntity b = mock(OafEntity.class); @@ -165,7 +165,7 @@ public class MergeAndGetTest { class SelectNewerAndGetStrategy { @Test - public void shouldThrowForOafEntityAndRelation() { + void shouldThrowForOafEntityAndRelation() { // given OafEntity a = mock(OafEntity.class); Relation b = mock(Relation.class); @@ -178,7 +178,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForRelationAndOafEntity() { + void shouldThrowForRelationAndOafEntity() { // given Relation a = mock(Relation.class); OafEntity b = mock(OafEntity.class); @@ -191,7 +191,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndResult() { + void shouldThrowForOafEntityAndResult() { // given OafEntity a = mock(OafEntity.class); Result b = mock(Result.class); @@ -204,7 +204,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() { + void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() { // given // real types must be used because subclass-superclass resolution does not work for // mocks @@ -221,7 +221,7 @@ public class MergeAndGetTest { } @Test - public void shouldShouldReturnLeftForOafEntityAndOafEntity() { + void shouldShouldReturnLeftForOafEntityAndOafEntity() { // given OafEntity a = mock(OafEntity.class); when(a.getLastupdatetimestamp()).thenReturn(1L); @@ -238,7 +238,7 @@ public class MergeAndGetTest { } @Test - public void shouldShouldReturnRightForOafEntityAndOafEntity() { + void shouldShouldReturnRightForOafEntityAndOafEntity() { // given OafEntity a = mock(OafEntity.class); when(a.getLastupdatetimestamp()).thenReturn(2L); diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java index 79ab55e07..99ce961aa 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java @@ -77,7 +77,7 @@ public class PromoteActionPayloadForGraphTableJobTest { class Main { @Test - public void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() { + void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() { // given Class rowClazz = Relation.class; Class actionPayloadClazz = OafEntity.class; @@ -116,7 +116,7 @@ public class PromoteActionPayloadForGraphTableJobTest { @ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}") @MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams") - public void shouldPromoteActionPayloadForGraphTable( + void shouldPromoteActionPayloadForGraphTable( MergeAndGet.Strategy strategy, Class rowClazz, Class actionPayloadClazz) diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java index 477e4b204..cbc1bfaba 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java @@ -44,7 +44,7 @@ public class PromoteActionPayloadFunctionsTest { class JoinTableWithActionPayloadAndMerge { @Test - public void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() { + void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() { // given class OafImpl extends Oaf { } @@ -58,7 +58,7 @@ public class PromoteActionPayloadFunctionsTest { } @Test - public void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() { + void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() { // given String id0 = "id0"; String id1 = "id1"; @@ -138,7 +138,7 @@ public class PromoteActionPayloadFunctionsTest { } @Test - public void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() { + void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() { // given String id0 = "id0"; String id1 = "id1"; @@ -218,7 +218,7 @@ public class PromoteActionPayloadFunctionsTest { class GroupTableByIdAndMerge { @Test - public void shouldRunProperly() { + void shouldRunProperly() { // given String id1 = "id1"; String id2 = "id2"; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java index 4b9fd33f4..a48b84a33 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.actionmanager.bipfinder; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -28,15 +29,16 @@ import eu.dnetlib.dhp.schema.oaf.Result; public class CollectAndSave implements Serializable { private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { + public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( - CollectAndSave.class - .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json")); + Objects + .requireNonNull( + CollectAndSave.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java index cea8c2891..f178451c1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java @@ -87,7 +87,7 @@ public class SparkAtomicActionScoreJob implements Serializable { private static void prepareResults(SparkSession spark, String inputPath, String outputPath, String bipScorePath, Class inputClazz) { - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD bipDeserializeJavaRDD = sc .textFile(bipScorePath) @@ -101,8 +101,6 @@ public class SparkAtomicActionScoreJob implements Serializable { return bs; }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)); - System.out.println(bipScores.count()); - Dataset results = readPath(spark, inputPath, inputClazz); results.createOrReplaceTempView("result"); @@ -124,7 +122,7 @@ public class SparkAtomicActionScoreJob implements Serializable { ret.setId(value._2().getId()); return ret; }, Encoders.bean(BipScore.class)) - .groupByKey((MapFunction) value -> value.getId(), Encoders.STRING()) + .groupByKey((MapFunction) BipScore::getId, Encoders.STRING()) .mapGroups((MapGroupsFunction) (k, it) -> { Result ret = new Result(); ret.setDataInfo(getDataInfo()); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java index 9e852eb77..40cf5ee53 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java @@ -171,26 +171,23 @@ public class PrepareProgramme { } private static CSVProgramme groupProgrammeByCode(CSVProgramme a, CSVProgramme b) { - if (!a.getLanguage().equals("en")) { - if (b.getLanguage().equalsIgnoreCase("en")) { - a.setTitle(b.getTitle()); - a.setLanguage(b.getLanguage()); - } + if (!a.getLanguage().equals("en") && b.getLanguage().equalsIgnoreCase("en")) { + a.setTitle(b.getTitle()); + a.setLanguage(b.getLanguage()); } - if (StringUtils.isEmpty(a.getShortTitle())) { - if (!StringUtils.isEmpty(b.getShortTitle())) { - a.setShortTitle(b.getShortTitle()); - } + if (StringUtils.isEmpty(a.getShortTitle()) && !StringUtils.isEmpty(b.getShortTitle())) { + a.setShortTitle(b.getShortTitle()); } return a; } + @SuppressWarnings("unchecked") private static List prepareClassification(JavaRDD h2020Programmes) { Object[] codedescription = h2020Programmes .map( value -> new Tuple2<>(value.getCode(), - new Tuple2(value.getTitle(), value.getShortTitle()))) + new Tuple2<>(value.getTitle(), value.getShortTitle()))) .collect() .toArray(); @@ -216,7 +213,7 @@ public class PrepareProgramme { String[] tmp = ent.split("\\."); if (tmp.length <= 2) { if (StringUtils.isEmpty(entry._2()._2())) { - map.put(entry._1(), new Tuple2(entry._2()._1(), entry._2()._1())); + map.put(entry._1(), new Tuple2<>(entry._2()._1(), entry._2()._1())); } else { map.put(entry._1(), entry._2()); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java index cecd537ba..b1a381415 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java @@ -29,7 +29,7 @@ import scala.Tuple2; */ public class PrepareProjects { - private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class); + private static final Logger log = LoggerFactory.getLogger(PrepareProjects.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static void main(String[] args) throws Exception { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java index 2bba9fb60..2cc20cb15 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java @@ -31,15 +31,16 @@ import eu.dnetlib.dhp.common.DbClient; */ public class ReadProjectsFromDB implements Closeable { - private final DbClient dbClient; private static final Log log = LogFactory.getLog(ReadProjectsFromDB.class); + + private static final String query = "SELECT code " + + "from projects where id like 'corda__h2020%' "; + + private final DbClient dbClient; private final Configuration conf; private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private final static String query = "SELECT code " + - "from projects where id like 'corda__h2020%' "; - public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -65,9 +66,9 @@ public class ReadProjectsFromDB implements Closeable { } } - public void execute(final String sql, final Function> producer) throws Exception { + public void execute(final String sql, final Function> producer) { - final Consumer consumer = rs -> producer.apply(rs).forEach(r -> writeProject(r)); + final Consumer consumer = rs -> producer.apply(rs).forEach(this::writeProject); dbClient.processResults(sql, consumer); } @@ -94,20 +95,20 @@ public class ReadProjectsFromDB implements Closeable { public ReadProjectsFromDB( final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword) - throws Exception { + throws IOException { this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); this.conf = new Configuration(); this.conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(this.conf); Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, false); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + FSDataOutputStream fos = fileSystem.create(hdfsWritePath); - this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); } @Override diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java index fdc12c662..a4a0bf6a4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java @@ -31,6 +31,7 @@ import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.H2020Classification; import eu.dnetlib.dhp.schema.oaf.H2020Programme; +import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; @@ -47,13 +48,10 @@ import scala.Tuple2; * * To produce one single entry for each project code a step of groupoing is needed: each project can be associated to more * than one programme. - * - * */ public class SparkAtomicActionJob { private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final HashMap programmeMap = new HashMap<>(); public static void main(String[] args) throws Exception { @@ -137,7 +135,6 @@ public class SparkAtomicActionJob { h2020classification.setClassification(csvProgramme.getClassification()); h2020classification.setH2020Programme(pm); setLevelsandProgramme(h2020classification, csvProgramme.getClassification_short()); - // setProgramme(h2020classification, ocsvProgramme.get().getClassification()); pp.setH2020classification(Arrays.asList(h2020classification)); return pp; @@ -152,20 +149,16 @@ public class SparkAtomicActionJob { .map((MapFunction, Project>) p -> { Optional op = Optional.ofNullable(p._2()); Project rp = p._1(); - if (op.isPresent()) { - rp.setH2020topicdescription(op.get().getTitle()); - } + op.ifPresent(excelTopic -> rp.setH2020topicdescription(excelTopic.getTitle())); return rp; }, Encoders.bean(Project.class)) .filter(Objects::nonNull) .groupByKey( - (MapFunction) p -> p.getId(), + (MapFunction) OafEntity::getId, Encoders.STRING()) .mapGroups((MapGroupsFunction) (s, it) -> { Project first = it.next(); - it.forEachRemaining(p -> { - first.mergeFrom(p); - }); + it.forEachRemaining(first::mergeFrom); return first; }, Encoders.bean(Project.class)) .toJavaRDD() @@ -189,12 +182,6 @@ public class SparkAtomicActionJob { h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]); } -// private static void setProgramme(H2020Classification h2020Classification, String classification) { -// String[] tmp = classification.split(" \\| "); -// -// h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]); -// } - public static Dataset readPath( SparkSession spark, String inputPath, Class clazz) { return spark diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java index 1d839bec5..c53cd2127 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java @@ -32,12 +32,14 @@ public class CSVParser { final Set headers = parser.getHeaderMap().keySet(); Class clazz = Class.forName(classForName); for (CSVRecord csvRecord : parser.getRecords()) { - final Object cc = clazz.newInstance(); + + @SuppressWarnings("unchecked") + final R cc = (R) clazz.newInstance(); for (String header : headers) { FieldUtils.writeField(cc, header, csvRecord.get(header), true); } - ret.add((R) cc); + ret.add(cc); } return ret; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java index 5f5b61d8b..5ce730692 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java @@ -26,52 +26,52 @@ public class EXCELParser { throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException, InvalidFormatException { - OPCPackage pkg = OPCPackage.open(file); - XSSFWorkbook wb = new XSSFWorkbook(pkg); + try (OPCPackage pkg = OPCPackage.open(file); XSSFWorkbook wb = new XSSFWorkbook(pkg)) { - XSSFSheet sheet = wb.getSheet(sheetName); - - if (sheetName == null) { - throw new RuntimeException("Sheet name " + sheetName + " not present in current file"); - } - - List ret = new ArrayList<>(); - - DataFormatter dataFormatter = new DataFormatter(); - Iterator rowIterator = sheet.rowIterator(); - List headers = new ArrayList<>(); - int count = 0; - while (rowIterator.hasNext()) { - Row row = rowIterator.next(); - - if (count == 0) { - Iterator cellIterator = row.cellIterator(); - - while (cellIterator.hasNext()) { - Cell cell = cellIterator.next(); - headers.add(dataFormatter.formatCellValue(cell)); - } - } else { - Class clazz = Class.forName(classForName); - final Object cc = clazz.newInstance(); - - for (int i = 0; i < headers.size(); i++) { - Cell cell = row.getCell(i); - FieldUtils.writeField(cc, headers.get(i), dataFormatter.formatCellValue(cell), true); - - } - - EXCELTopic et = (EXCELTopic) cc; - if (StringUtils.isNotBlank(et.getRcn())) { - ret.add((R) cc); - } + XSSFSheet sheet = wb.getSheet(sheetName); + if (sheetName == null) { + throw new IllegalArgumentException("Sheet name " + sheetName + " not present in current file"); } - count += 1; - } + List ret = new ArrayList<>(); - return ret; + DataFormatter dataFormatter = new DataFormatter(); + Iterator rowIterator = sheet.rowIterator(); + List headers = new ArrayList<>(); + int count = 0; + while (rowIterator.hasNext()) { + Row row = rowIterator.next(); + + if (count == 0) { + Iterator cellIterator = row.cellIterator(); + + while (cellIterator.hasNext()) { + Cell cell = cellIterator.next(); + headers.add(dataFormatter.formatCellValue(cell)); + } + } else { + Class clazz = Class.forName(classForName); + final Object cc = clazz.newInstance(); + + for (int i = 0; i < headers.size(); i++) { + Cell cell = row.getCell(i); + FieldUtils.writeField(cc, headers.get(i), dataFormatter.formatCellValue(cell), true); + + } + + EXCELTopic et = (EXCELTopic) cc; + if (StringUtils.isNotBlank(et.getRcn())) { + ret.add((R) cc); + } + + } + + count += 1; + } + + return ret; + } } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java index f9118350f..1ae775bec 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java @@ -26,7 +26,7 @@ import eu.dnetlib.dhp.collection.HttpConnector2; */ public class ReadCSV implements Closeable { private static final Log log = LogFactory.getLog(ReadCSV.class); - private final Configuration conf; + private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final String csvFile; @@ -54,18 +54,17 @@ public class ReadCSV implements Closeable { log.info("Getting CSV file..."); readCSV.execute(classForName); - } } - public void execute(final String classForName) throws Exception { + public void execute(final String classForName) + throws IOException, ClassNotFoundException, IllegalAccessException, InstantiationException { CSVParser csvParser = new CSVParser(); csvParser .parse(csvFile, classForName, delimiter) .stream() - .forEach(p -> write(p)); - + .forEach(this::write); } @Override @@ -79,18 +78,18 @@ public class ReadCSV implements Closeable { final String fileURL, char delimiter) throws Exception { - this.conf = new Configuration(); - this.conf.set("fs.defaultFS", hdfsNameNode); + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); HttpConnector2 httpConnector = new HttpConnector2(); - FileSystem fileSystem = FileSystem.get(this.conf); + FileSystem fileSystem = FileSystem.get(conf); Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, false); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + final FSDataOutputStream fos = fileSystem.create(hdfsWritePath); - this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); this.csvFile = httpConnector.getInputSource(fileURL); this.delimiter = delimiter; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java index a13d9b791..359e46fc7 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java @@ -11,18 +11,20 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.collection.CollectorException; import eu.dnetlib.dhp.collection.HttpConnector2; /** * Applies the parsing of an excel file and writes the Serialization of it in hdfs */ public class ReadExcel implements Closeable { - private static final Log log = LogFactory.getLog(ReadCSV.class); - private final Configuration conf; + private static final Log log = LogFactory.getLog(ReadExcel.class); + private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final InputStream excelFile; @@ -51,13 +53,15 @@ public class ReadExcel implements Closeable { } } - public void execute(final String classForName, final String sheetName) throws Exception { + public void execute(final String classForName, final String sheetName) + throws IOException, ClassNotFoundException, InvalidFormatException, IllegalAccessException, + InstantiationException { + EXCELParser excelParser = new EXCELParser(); excelParser .parse(excelFile, classForName, sheetName) .stream() - .forEach(p -> write(p)); - + .forEach(this::write); } @Override @@ -68,20 +72,20 @@ public class ReadExcel implements Closeable { public ReadExcel( final String hdfsPath, final String hdfsNameNode, - final String fileURL) - throws Exception { - this.conf = new Configuration(); - this.conf.set("fs.defaultFS", hdfsNameNode); + final String fileURL) throws CollectorException, IOException { + + final Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); HttpConnector2 httpConnector = new HttpConnector2(); - FileSystem fileSystem = FileSystem.get(this.conf); + FileSystem fileSystem = FileSystem.get(conf); Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, false); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + FSDataOutputStream fos = fileSystem.create(hdfsWritePath); - this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); this.excelFile = httpConnector.getInputSourceAsStream(fileURL); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java index d6c17e415..869e1cb68 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java @@ -9,6 +9,7 @@ import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; +import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; @@ -74,7 +75,7 @@ public class GenerateRorActionSetJob { final String jsonConfiguration = IOUtils .toString( - SparkAtomicActionJob.class + GenerateRorActionSetJob.class .getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -108,7 +109,7 @@ public class GenerateRorActionSetJob { private static void processRorOrganizations(final SparkSession spark, final String inputPath, - final String outputPath) throws Exception { + final String outputPath) throws IOException { readInputPath(spark, inputPath) .map( @@ -203,7 +204,7 @@ public class GenerateRorActionSetJob { private static Dataset readInputPath( final SparkSession spark, - final String path) throws Exception { + final String path) throws IOException { try (final FileSystem fileSystem = FileSystem.get(new Configuration()); final InputStream is = fileSystem.open(new Path(path))) { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java index b566a5501..a5acea5ae 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java @@ -7,6 +7,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class Address implements Serializable { + private static final long serialVersionUID = 2444635485253443195L; + @JsonProperty("lat") private Float lat; @@ -37,8 +39,6 @@ public class Address implements Serializable { @JsonProperty("line") private String line; - private final static long serialVersionUID = 2444635485253443195L; - public Float getLat() { return lat; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java index 3dab60a9f..1e7621f98 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java @@ -7,14 +7,14 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class Country implements Serializable { + private static final long serialVersionUID = 4357848706229493627L; + @JsonProperty("country_code") private String countryCode; @JsonProperty("country_name") private String countryName; - private final static long serialVersionUID = 4357848706229493627L; - public String getCountryCode() { return countryCode; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java index 406bfd82c..5ea419b4e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java @@ -13,7 +13,7 @@ public class ExternalIdType implements Serializable { private String preferred; - private final static long serialVersionUID = 2616688352998387611L; + private static final long serialVersionUID = 2616688352998387611L; public ExternalIdType() { } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java index 3fd0c9250..a744a325f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java @@ -15,8 +15,7 @@ import com.fasterxml.jackson.databind.JsonNode; public class ExternalIdTypeDeserializer extends JsonDeserializer { @Override - public ExternalIdType deserialize(final JsonParser p, final DeserializationContext ctxt) - throws IOException, JsonProcessingException { + public ExternalIdType deserialize(final JsonParser p, final DeserializationContext ctxt) throws IOException { final ObjectCodec oc = p.getCodec(); final JsonNode node = oc.readTree(p); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java index 9616db447..9317a777c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java @@ -19,7 +19,7 @@ public class GeonamesAdmin implements Serializable { @JsonProperty("code") private String code; - private final static long serialVersionUID = 7294958526269195673L; + private static final long serialVersionUID = 7294958526269195673L; public String getAsciiName() { return asciiName; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java index 2b0487168..b13d64b10 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java @@ -31,7 +31,7 @@ public class GeonamesCity implements Serializable { @JsonProperty("license") private License license; - private final static long serialVersionUID = -8389480201526252955L; + private static final long serialVersionUID = -8389480201526252955L; public NameAndCode getNutsLevel2() { return nutsLevel2; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java index 61eb0339d..9a2cb39e3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java @@ -13,7 +13,7 @@ public class Label implements Serializable { @JsonProperty("label") private String label; - private final static long serialVersionUID = -6576156103297850809L; + private static final long serialVersionUID = -6576156103297850809L; public String getIso639() { return iso639; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java index bdc8f4c42..a0f6cf774 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java @@ -13,7 +13,7 @@ public class License implements Serializable { @JsonProperty("license") private String license; - private final static long serialVersionUID = -194308261058176439L; + private static final long serialVersionUID = -194308261058176439L; public String getAttribution() { return attribution; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java index 61d7eb8e6..c0f5d7645 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java @@ -7,14 +7,14 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class NameAndCode implements Serializable { + private static final long serialVersionUID = 5459836979206140843L; + @JsonProperty("name") private String name; @JsonProperty("code") private String code; - private final static long serialVersionUID = 5459836979206140843L; - public String getName() { return name; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java index 8b73db98f..db9f96445 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java @@ -7,6 +7,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class Relationship implements Serializable { + private static final long serialVersionUID = 7847399503395576960L; + @JsonProperty("type") private String type; @@ -16,8 +18,6 @@ public class Relationship implements Serializable { @JsonProperty("label") private String label; - private final static long serialVersionUID = 7847399503395576960L; - public String getType() { return type; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java index 94de34fee..b8041cfdf 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java @@ -11,6 +11,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class RorOrganization implements Serializable { + private static final long serialVersionUID = -2658312087616043225L; + @JsonProperty("ip_addresses") private List ipAddresses = new ArrayList<>(); @@ -59,8 +61,6 @@ public class RorOrganization implements Serializable { @JsonProperty("status") private String status; - private final static long serialVersionUID = -2658312087616043225L; - public List getIpAddresses() { return ipAddresses; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java index c822a6723..8e46ab92b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java @@ -11,8 +11,6 @@ import java.util.Objects; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.gson.Gson; - import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.utils.DHPUtils; @@ -20,12 +18,12 @@ public class AggregatorReport extends LinkedHashMap implements C private static final Logger log = LoggerFactory.getLogger(AggregatorReport.class); - private MessageSender messageSender; + private transient MessageSender messageSender; public AggregatorReport() { } - public AggregatorReport(MessageSender messageSender) throws IOException { + public AggregatorReport(MessageSender messageSender) { this.messageSender = messageSender; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java index 9926f1688..549169673 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java @@ -22,7 +22,7 @@ public abstract class ReportingJob { protected final AggregatorReport report; - public ReportingJob(AggregatorReport report) { + protected ReportingJob(AggregatorReport report) { this.report = report; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java index 65e7805d8..bab44a3b1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java @@ -25,7 +25,7 @@ public class MDStoreActionNode { NEW_VERSION, ROLLBACK, COMMIT, READ_LOCK, READ_UNLOCK } - public static String NEW_VERSION_URI = "%s/mdstore/%s/newVersion"; + public static final String NEW_VERSION_URI = "%s/mdstore/%s/newVersion"; public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s"; public static final String ROLLBACK_VERSION_URL = "%s/version/%s/abort"; @@ -70,7 +70,7 @@ public class MDStoreActionNode { if (StringUtils.isBlank(hdfsuri)) { throw new IllegalArgumentException("missing or empty argument namenode"); } - final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); + final String mdStoreVersion_params = argumentParser.get(MDSTOREVERSIONPARAM); final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class); if (StringUtils.isBlank(mdStoreVersion.getId())) { @@ -94,7 +94,7 @@ public class MDStoreActionNode { break; } case ROLLBACK: { - final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); + final String mdStoreVersion_params = argumentParser.get(MDSTOREVERSIONPARAM); final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class); if (StringUtils.isBlank(mdStoreVersion.getId())) { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java index 23ee3e2c6..d0872da1d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java @@ -116,7 +116,7 @@ public class CollectorWorker extends ReportingJob { final CollectorPlugin.NAME.OTHER_NAME plugin = Optional .ofNullable(api.getParams().get("other_plugin_type")) .map(CollectorPlugin.NAME.OTHER_NAME::valueOf) - .get(); + .orElseThrow(() -> new IllegalArgumentException("invalid other_plugin_type")); switch (plugin) { case mdstore_mongodb_dump: diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java index f6fdc266e..f1f74b09e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java @@ -207,6 +207,7 @@ public class GenerateNativeStoreSparkJob { totalItems.add(1); try { SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); Document document = reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8))); Node node = document.selectSingleNode(xpath); final String originalIdentifier = node.getText(); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java index a61e2032c..8493a3436 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java @@ -32,7 +32,7 @@ public class HttpConnector2 { private String responseType = null; - private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; + private static final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; public HttpConnector2() { this(new HttpClientParams()); @@ -120,7 +120,7 @@ public class HttpConnector2 { if (is3xx(urlConn.getResponseCode())) { // REDIRECTS final String newUrl = obtainNewLocation(urlConn.getHeaderFields()); - log.info(String.format("The requested url has been moved to %s", newUrl)); + log.info("The requested url has been moved to {}", newUrl); report .put( REPORT_PREFIX + urlConn.getResponseCode(), @@ -140,14 +140,14 @@ public class HttpConnector2 { if (retryAfter > 0) { log .warn( - requestUrl + " - waiting and repeating request after suggested retry-after " - + retryAfter + " sec."); + "{} - waiting and repeating request after suggested retry-after {} sec.", + requestUrl, retryAfter); backoffAndSleep(retryAfter * 1000); } else { log .warn( - requestUrl + " - waiting and repeating request after default delay of " - + getClientParams().getRetryDelay() + " sec."); + "{} - waiting and repeating request after default delay of {} sec.", + requestUrl, getClientParams().getRetryDelay()); backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000); } report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl); @@ -181,12 +181,12 @@ public class HttpConnector2 { } private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { - log.debug("StatusCode: " + urlConn.getResponseMessage()); + log.debug("StatusCode: {}", urlConn.getResponseMessage()); for (Map.Entry> e : urlConn.getHeaderFields().entrySet()) { if (e.getKey() != null) { for (String v : e.getValue()) { - log.debug(" key: " + e.getKey() + " - value: " + v); + log.debug(" key: {} - value: {}", e.getKey(), v); } } } @@ -204,7 +204,7 @@ public class HttpConnector2 { private int obtainRetryAfter(final Map> headerMap) { for (String key : headerMap.keySet()) { - if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (headerMap.get(key).size() > 0) + if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty()) && NumberUtils.isCreatable(headerMap.get(key).get(0))) { return Integer.parseInt(headerMap.get(key).get(0)) + 10; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java index a27314983..549c59720 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java @@ -11,8 +11,6 @@ import org.bson.Document; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.mongodb.MongoClient; -import com.mongodb.MongoClientURI; import com.mongodb.client.MongoCollection; import eu.dnetlib.dhp.aggregation.common.AggregatorReport; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java index 3199af5b7..ec5bab448 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java @@ -23,7 +23,7 @@ public class MongoDbDumpCollectorPlugin implements CollectorPlugin { public static final String PATH_PARAM = "path"; public static final String BODY_JSONPATH = "$.body"; - public FileSystem fileSystem; + private final FileSystem fileSystem; public MongoDbDumpCollectorPlugin(FileSystem fileSystem) { this.fileSystem = fileSystem; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java index 75dd746ea..331dee6b4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.collection.plugin.oai; import java.io.IOException; -import java.io.StringReader; import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; @@ -16,7 +15,6 @@ import org.dom4j.DocumentException; import org.dom4j.DocumentHelper; import org.dom4j.Node; import org.dom4j.io.OutputFormat; -import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,7 +28,8 @@ public class OaiIterator implements Iterator { private static final Logger log = LoggerFactory.getLogger(OaiIterator.class); - private final static String REPORT_PREFIX = "oai:"; + private static final String REPORT_PREFIX = "oai:"; + public static final String UTF_8 = "UTF-8"; private final Queue queue = new PriorityBlockingQueue<>(); @@ -68,7 +67,7 @@ public class OaiIterator implements Iterator { try { this.token = firstPage(); } catch (final CollectorException e) { - throw new RuntimeException(e); + throw new IllegalStateException(e); } } } @@ -90,7 +89,7 @@ public class OaiIterator implements Iterator { try { token = otherPages(token); } catch (final CollectorException e) { - throw new RuntimeException(e); + throw new IllegalStateException(e); } } return res; @@ -99,23 +98,24 @@ public class OaiIterator implements Iterator { @Override public void remove() { + throw new UnsupportedOperationException(); } private String firstPage() throws CollectorException { try { - String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, "UTF-8"); + String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, UTF_8); if (set != null && !set.isEmpty()) { - url += "&set=" + URLEncoder.encode(set, "UTF-8"); + url += "&set=" + URLEncoder.encode(set, UTF_8); } if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX) || fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { - url += "&from=" + URLEncoder.encode(fromDate, "UTF-8"); + url += "&from=" + URLEncoder.encode(fromDate, UTF_8); } if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX) || untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { - url += "&until=" + URLEncoder.encode(untilDate, "UTF-8"); + url += "&until=" + URLEncoder.encode(untilDate, UTF_8); } - log.info("Start harvesting using url: " + url); + log.info("Start harvesting using url: {}", url); return downloadPage(url); } catch (final UnsupportedEncodingException e) { @@ -143,7 +143,7 @@ public class OaiIterator implements Iterator { return downloadPage( baseUrl + "?verb=ListRecords&resumptionToken=" - + URLEncoder.encode(resumptionToken, "UTF-8")); + + URLEncoder.encode(resumptionToken, UTF_8)); } catch (final UnsupportedEncodingException e) { report.put(e.getClass().getName(), e.getMessage()); throw new CollectorException(e); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java index 764c21fc2..a90d259b4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java @@ -131,7 +131,8 @@ public class RestIterator implements Iterator { private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath) throws TransformerConfigurationException, XPathExpressionException { - transformer = TransformerFactory.newInstance().newTransformer(); + final TransformerFactory factory = TransformerFactory.newInstance(); + transformer = factory.newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3"); xpath = XPathFactory.newInstance().newXPath(); @@ -142,7 +143,7 @@ public class RestIterator implements Iterator { private void initQueue() { query = baseUrl + "?" + queryParams + querySize + queryFormat; - log.info("REST calls starting with " + query); + log.info("REST calls starting with {}", query); } private void disconnect() { @@ -174,7 +175,7 @@ public class RestIterator implements Iterator { try { query = downloadPage(query); } catch (CollectorException e) { - log.debug("CollectorPlugin.next()-Exception: " + e); + log.debug("CollectorPlugin.next()-Exception: {}", e); throw new RuntimeException(e); } } @@ -198,7 +199,7 @@ public class RestIterator implements Iterator { // check if cursor=* is initial set otherwise add it to the queryParam URL if (resumptionType.equalsIgnoreCase("deep-cursor")) { - log.debug("check resumptionType deep-cursor and check cursor=*?" + query); + log.debug("check resumptionType deep-cursor and check cursor=*?{}", query); if (!query.contains("&cursor=")) { query += "&cursor=*"; } @@ -208,16 +209,16 @@ public class RestIterator implements Iterator { log.info("requestig URL [{}]", query); URL qUrl = new URL(query); - log.debug("authMethod :" + authMethod); + log.debug("authMethod: {}", authMethod); if ("bearer".equalsIgnoreCase(this.authMethod)) { - log.trace("authMethod before inputStream: " + resultXml); + log.trace("authMethod before inputStream: {}", resultXml); HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + authToken); conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType()); conn.setRequestMethod("GET"); theHttpInputStream = conn.getInputStream(); } else if (BASIC.equalsIgnoreCase(this.authMethod)) { - log.trace("authMethod before inputStream: " + resultXml); + log.trace("authMethod before inputStream: {}", resultXml); HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Basic " + authToken); conn.setRequestProperty(HttpHeaders.ACCEPT, ContentType.APPLICATION_XML.getMimeType()); @@ -237,13 +238,13 @@ public class RestIterator implements Iterator { if (!(emptyXml).equalsIgnoreCase(resultXml)) { resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE); nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET); - log.debug("nodeList.length: " + nodeList.getLength()); + log.debug("nodeList.length: {}", nodeList.getLength()); for (int i = 0; i < nodeList.getLength(); i++) { StringWriter sw = new StringWriter(); transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw)); String toEnqueue = sw.toString(); if (toEnqueue == null || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)) { - log.warn("The following record resulted in empty item for the feeding queue: " + resultXml); + log.warn("The following record resulted in empty item for the feeding queue: {}", resultXml); } else { recordQueue.add(sw.toString()); } @@ -274,9 +275,9 @@ public class RestIterator implements Iterator { String[] resumptionKeyValue = arrayUrlArgStr.split("="); if (isInteger(resumptionKeyValue[1])) { urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]); - log.debug("discover OldResumptionSize from Url (int): " + urlOldResumptionSize); + log.debug("discover OldResumptionSize from Url (int): {}", urlOldResumptionSize); } else { - log.debug("discover OldResumptionSize from Url (str): " + resumptionKeyValue[1]); + log.debug("discover OldResumptionSize from Url (str): {}", resumptionKeyValue[1]); } } } @@ -295,7 +296,7 @@ public class RestIterator implements Iterator { discoverResultSize += nodeList.getLength(); } } - log.info("discoverResultSize: {}", discoverResultSize); + log.info("discoverResultSize: {}", discoverResultSize); break; case "pagination": diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index c7201a267..a01703675 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -67,10 +67,10 @@ public class TransformSparkJobNode { log.info("outputBasePath: {}", outputBasePath); final String isLookupUrl = parser.get("isLookupUrl"); - log.info(String.format("isLookupUrl: %s", isLookupUrl)); + log.info("isLookupUrl: {}", isLookupUrl); final String dateOfTransformation = parser.get("dateOfTransformation"); - log.info(String.format("dateOfTransformation: %s", dateOfTransformation)); + log.info("dateOfTransformation: {}", dateOfTransformation); final Integer rpt = Optional .ofNullable(parser.get("recordsPerTask")) @@ -129,9 +129,9 @@ public class TransformSparkJobNode { .map((Function) x::call); saveDataset(spark.createDataset(mdstore.rdd(), encoder), outputBasePath + MDSTORE_DATA_PATH); - log.info("Transformed item " + ct.getProcessedItems().count()); - log.info("Total item " + ct.getTotalItems().count()); - log.info("Transformation Error item " + ct.getErrorItems().count()); + log.info("Transformed item {}", ct.getProcessedItems().count()); + log.info("Total item {}", ct.getTotalItems().count()); + log.info("Transformation Error item {}", ct.getErrorItems().count()); final long mdStoreSize = spark.read().load(outputBasePath + MDSTORE_DATA_PATH).count(); writeHdfsFile( diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java index 096d0e289..e93f3b518 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java @@ -13,12 +13,18 @@ import eu.dnetlib.dhp.aggregation.common.AggregationCounter; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class TransformationFactory { private static final Logger log = LoggerFactory.getLogger(TransformationFactory.class); - public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') where $x//RESOURCE_IDENTIFIER/@value = \"%s\" return $x//CODE/*[local-name() =\"stylesheet\"]"; + public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') " + + + "where $x//RESOURCE_IDENTIFIER/@value = \"%s\" return $x//CODE/*[local-name() =\"stylesheet\"]"; + + private TransformationFactory() { + } public static MapFunction getTransformationPlugin( final Map jobArgument, final AggregationCounter counters, final ISLookUpService isLookupService) @@ -27,7 +33,7 @@ public class TransformationFactory { try { final String transformationPlugin = jobArgument.get("transformationPlugin"); - log.info("Transformation plugin required " + transformationPlugin); + log.info("Transformation plugin required {}", transformationPlugin); switch (transformationPlugin) { case "XSLT_TRANSFORM": { final String transformationRuleId = jobArgument.get("transformationRuleId"); @@ -38,7 +44,7 @@ public class TransformationFactory { final String transformationRule = queryTransformationRuleFromIS( transformationRuleId, isLookupService); - final long dateOfTransformation = new Long(jobArgument.get("dateOfTransformation")); + final long dateOfTransformation = Long.parseLong(jobArgument.get("dateOfTransformation")); return new XSLTTransformationFunction(counters, transformationRule, dateOfTransformation, vocabularies); @@ -46,7 +52,6 @@ public class TransformationFactory { default: throw new DnetTransformationException( "transformation plugin does not exists for " + transformationPlugin); - } } catch (Throwable e) { @@ -55,9 +60,9 @@ public class TransformationFactory { } private static String queryTransformationRuleFromIS(final String transformationRuleId, - final ISLookUpService isLookUpService) throws Exception { + final ISLookUpService isLookUpService) throws DnetTransformationException, ISLookUpException { final String query = String.format(TRULE_XQUERY, transformationRuleId); - System.out.println("asking query to IS: " + query); + log.info("asking query to IS: {}", query); List result = isLookUpService.quickSearchProfile(query); if (result == null || result.isEmpty()) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java index 9da0747e6..3d57b966f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java @@ -4,11 +4,6 @@ package eu.dnetlib.dhp.transformation.xslt; import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI; import java.io.Serializable; -import java.time.LocalDate; -import java.time.format.DateTimeFormatter; -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions; import net.sf.saxon.s9api.*; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java index e3d588858..1aa549c09 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java @@ -28,22 +28,12 @@ public class PersonCleaner implements ExtensionFunction, Serializable { private static final Set particles = null; - public PersonCleaner() { - - } - private String normalize(String s) { s = Normalizer.normalize(s, Normalizer.Form.NFD); // was NFD s = s.replaceAll("\\(.+\\)", ""); s = s.replaceAll("\\[.+\\]", ""); s = s.replaceAll("\\{.+\\}", ""); s = s.replaceAll("\\s+-\\s+", "-"); - -// s = s.replaceAll("[\\W&&[^,-]]", " "); - -// System.out.println("class Person: s: " + s); - -// s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}&&[^,-]]", " "); s = s.replaceAll("[\\p{Punct}&&[^-,]]", " "); s = s.replace("\\d", " "); s = s.replace("\\n", " "); @@ -51,8 +41,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { s = s.replaceAll("\\s+", " "); if (s.contains(",")) { - // System.out.println("class Person: s: " + s); - String[] arr = s.split(","); if (arr.length == 1) { @@ -60,9 +48,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { } else if (arr.length > 1) { surname = splitTerms(arr[0]); firstname = splitTermsFirstName(arr[1]); -// System.out.println("class Person: surname: " + surname); -// System.out.println("class Person: firstname: " + firstname); - fullname.addAll(surname); fullname.addAll(firstname); } @@ -82,7 +67,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { } if (lastInitialPosition < fullname.size() - 1) { // Case: Michele G. Artini firstname = fullname.subList(0, lastInitialPosition + 1); - System.out.println("name: " + firstname); surname = fullname.subList(lastInitialPosition + 1, fullname.size()); } else if (hasSurnameInUpperCase) { // Case: Michele ARTINI for (String term : fullname) { @@ -119,16 +103,9 @@ public class PersonCleaner implements ExtensionFunction, Serializable { } private List splitTerms(String s) { - if (particles == null) { - // particles = NGramUtils.loadFromClasspath("/eu/dnetlib/pace/config/name_particles.txt"); - } - List list = Lists.newArrayList(); for (String part : Splitter.on(" ").omitEmptyStrings().split(s)) { - // if (!particles.contains(part.toLowerCase())) { list.add(part); - - // } } return list; } @@ -152,9 +129,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { public String getNormalisedFullname() { return isAccurate() ? Joiner.on(" ").join(getSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) : Joiner.on(" ").join(fullname); - // return isAccurate() ? - // Joiner.on(" ").join(getCapitalSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) : - // Joiner.on(" ").join(fullname); } public List getCapitalSurname() { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java index 43291e5de..acf48ccc5 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java @@ -1,7 +1,6 @@ package eu.dnetlib.dhp.transformation.xslt; -import java.io.ByteArrayInputStream; import java.io.Serializable; import java.io.StringWriter; import java.nio.charset.StandardCharsets; @@ -18,11 +17,11 @@ import net.sf.saxon.s9api.*; public class XSLTTransformationFunction implements MapFunction, Serializable { - public final static String QNAME_BASE_URI = "http://eu/dnetlib/transform"; + public static final String QNAME_BASE_URI = "http://eu/dnetlib/transform"; - private final static String DATASOURCE_ID_PARAM = "varDataSourceId"; + private static final String DATASOURCE_ID_PARAM = "varDataSourceId"; - private final static String DATASOURCE_NAME_PARAM = "varOfficialName"; + private static final String DATASOURCE_NAME_PARAM = "varOfficialName"; private final AggregationCounter aggregationCounter; @@ -38,8 +37,7 @@ public class XSLTTransformationFunction implements MapFunction { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java index 7200d2896..f2158748b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.actionmanager.bipfinder; +import static org.junit.jupiter.api.Assertions.*; + import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -67,7 +69,7 @@ public class SparkAtomicActionScoreJobTest { } @Test - public void matchOne() throws Exception { + void matchOne() throws Exception { String bipScoresPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") .getPath(); @@ -98,7 +100,7 @@ public class SparkAtomicActionScoreJobTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Publication) aa.getPayload())); - Assertions.assertTrue(tmp.count() == 1); + assertEquals(1, tmp.count()); Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); verificationDataset.createOrReplaceTempView("publication"); @@ -129,7 +131,7 @@ public class SparkAtomicActionScoreJobTest { } @Test - public void matchOneWithTwo() throws Exception { + void matchOneWithTwo() throws Exception { String bipScoresPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") .getPath(); @@ -160,7 +162,7 @@ public class SparkAtomicActionScoreJobTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Publication) aa.getPayload())); - Assertions.assertTrue(tmp.count() == 1); + assertEquals(1, tmp.count()); Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); verificationDataset.createOrReplaceTempView("publication"); @@ -190,23 +192,21 @@ public class SparkAtomicActionScoreJobTest { List tmp_ds = execVerification.filter("id = 'influence'").select("value").collectAsList(); String tmp_influence = tmp_ds.get(0).getString(0); - Assertions - .assertTrue( - "1.47565045883e-08".equals(tmp_influence) || - "1.98956540239e-08".equals(tmp_influence)); + assertTrue( + "1.47565045883e-08".equals(tmp_influence) || + "1.98956540239e-08".equals(tmp_influence)); tmp_influence = tmp_ds.get(1).getString(0); - Assertions - .assertTrue( - "1.47565045883e-08".equals(tmp_influence) || - "1.98956540239e-08".equals(tmp_influence)); + assertTrue( + "1.47565045883e-08".equals(tmp_influence) || + "1.98956540239e-08".equals(tmp_influence)); - Assertions.assertTrue(!tmp_ds.get(0).getString(0).equals(tmp_ds.get(1).getString(0))); + assertNotEquals(tmp_ds.get(1).getString(0), tmp_ds.get(0).getString(0)); } @Test - public void matchTwo() throws Exception { + void matchTwo() throws Exception { String bipScoresPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") .getPath(); @@ -237,7 +237,7 @@ public class SparkAtomicActionScoreJobTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Publication) aa.getPayload())); - Assertions.assertTrue(tmp.count() == 2); + assertEquals(2, tmp.count()); Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); verificationDataset.createOrReplaceTempView("publication"); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java index da5beecf9..dd7e1910f 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java @@ -9,10 +9,10 @@ import org.junit.jupiter.api.Test; import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser; -public class CSVParserTest { +class CSVParserTest { @Test - public void readProgrammeTest() throws Exception { + void readProgrammeTest() throws Exception { String programmecsv = IOUtils .toString( diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java index b7155bc3a..cc36421a0 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java @@ -20,7 +20,7 @@ import eu.dnetlib.dhp.collection.HttpConnector2; public class EXCELParserTest { private static Path workingDir; - private HttpConnector2 httpConnector = new HttpConnector2(); + private final HttpConnector2 httpConnector = new HttpConnector2(); private static final String URL = "https://cordis.europa.eu/data/reference/cordisref-h2020topics.xlsx"; @BeforeAll @@ -30,7 +30,7 @@ public class EXCELParserTest { } @Test - public void test1() throws CollectorException, IOException, InvalidFormatException, ClassNotFoundException, + void test1() throws CollectorException, IOException, InvalidFormatException, ClassNotFoundException, IllegalAccessException, InstantiationException { EXCELParser excelParser = new EXCELParser(); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java index 256dc0521..f128b5610 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java @@ -66,7 +66,7 @@ public class PrepareH2020ProgrammeTest { } @Test - public void numberDistinctProgrammeTest() throws Exception { + void numberDistinctProgrammeTest() throws Exception { PrepareProgramme .main( new String[] { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java index 0db3485f5..f0f3532aa 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java @@ -66,7 +66,7 @@ public class PrepareProjectTest { } @Test - public void numberDistinctProjectTest() throws Exception { + void numberDistinctProjectTest() throws Exception { PrepareProjects .main( new String[] { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java index 42e494681..a77dbace4 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java @@ -69,7 +69,7 @@ public class SparkUpdateProjectTest { } @Test - public void numberDistinctProgrammeTest() throws Exception { + void numberDistinctProgrammeTest() throws Exception { SparkAtomicActionJob .main( new String[] { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java index f16901cb4..aa11f4ab5 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp.actionmanager.ror; import java.io.FileInputStream; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -30,7 +32,9 @@ class GenerateRorActionSetJobTest { .readValue(IOUtils.toString(getClass().getResourceAsStream("ror_org.json")), RorOrganization.class); final Organization org = GenerateRorActionSetJob.convertRorOrg(r); - System.out.println(mapper.writeValueAsString(org)); + final String s = mapper.writeValueAsString(org); + Assertions.assertTrue(StringUtils.isNotBlank(s)); + System.out.println(s); } @Test @@ -39,7 +43,9 @@ class GenerateRorActionSetJobTest { .readValue(IOUtils.toString(new FileInputStream(local_file_path)), RorOrganization[].class); for (final RorOrganization r : arr) { - GenerateRorActionSetJob.convertRorOrg(r); + Organization o = GenerateRorActionSetJob.convertRorOrg(r); + Assertions.assertNotNull(o); + Assertions.assertTrue(StringUtils.isNotBlank(o.getId())); } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java index afb6ae6a1..633a47379 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java @@ -97,7 +97,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { @Test @Order(1) - public void testGenerateNativeStoreSparkJobRefresh() throws Exception { + void testGenerateNativeStoreSparkJobRefresh() throws Exception { MDStoreVersion mdStoreV1 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_1.json"); FileUtils.forceMkdir(new File(mdStoreV1.getHdfsPath())); @@ -125,7 +125,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { @Test @Order(2) - public void testGenerateNativeStoreSparkJobIncremental() throws Exception { + void testGenerateNativeStoreSparkJobIncremental() throws Exception { MDStoreVersion mdStoreV2 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_2.json"); FileUtils.forceMkdir(new File(mdStoreV2.getHdfsPath())); @@ -155,7 +155,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { @Test @Order(3) - public void testTransformSparkJob() throws Exception { + void testTransformSparkJob() throws Exception { setUpVocabulary(); @@ -206,7 +206,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { } @Test - public void testJSONSerialization() throws Exception { + void testJSONSerialization() throws Exception { final String s = IOUtils.toString(getClass().getResourceAsStream("mdStoreVersion_1.json")); System.out.println("s = " + s); final ObjectMapper mapper = new ObjectMapper(); @@ -217,7 +217,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { } @Test - public void testGenerationMetadataRecord() throws Exception { + void testGenerationMetadataRecord() throws Exception { final String xml = IOUtils.toString(this.getClass().getResourceAsStream("./record.xml")); @@ -236,7 +236,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { } @Test - public void testEquals() throws IOException { + void testEquals() throws IOException { final String xml = IOUtils.toString(this.getClass().getResourceAsStream("./record.xml")); final MetadataRecord record = GenerateNativeStoreSparkJob diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java index efe925175..f2b873e10 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java @@ -21,7 +21,7 @@ import eu.dnetlib.dhp.collection.HttpClientParams; * @author js, Andreas Czerniak * */ -public class RestCollectorPluginTest { +class RestCollectorPluginTest { private static final Logger log = LoggerFactory.getLogger(RestCollectorPluginTest.class); @@ -65,7 +65,7 @@ public class RestCollectorPluginTest { @Disabled @Test - public void test() throws CollectorException { + void test() throws CollectorException { AtomicInteger i = new AtomicInteger(0); final Stream stream = rcp.collect(api, new AggregatorReport()); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java index b5ea5f069..f52f4632a 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.collector.worker; -import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.*; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -11,10 +11,10 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.collection.ApiDescriptor; @Disabled -public class CollectorWorkerApplicationTests { +class CollectorWorkerApplicationTests { @Test - public void testCollectionOAI() throws Exception { + void testCollectionOAI() throws Exception { final ApiDescriptor api = new ApiDescriptor(); api.setId("oai"); api.setProtocol("oai"); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java index 948a8f93b..7bd7baaea 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java @@ -33,7 +33,7 @@ import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @ExtendWith(MockitoExtension.class) -public class TransformationJobTest extends AbstractVocabularyTest { +class TransformationJobTest extends AbstractVocabularyTest { private SparkConf sparkConf; @@ -49,7 +49,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Date cleaner") - public void testDateCleaner() throws Exception { + void testDateCleaner() throws Exception { DateCleaner dc = new DateCleaner(); assertEquals("1982-09-20", dc.clean("20/09/1982")); assertEquals("2002-09-20", dc.clean("20-09-2002")); @@ -60,7 +60,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform Single XML using zenodo_tr XSLTTransformator") - public void testTransformSaxonHE() throws Exception { + void testTransformSaxonHE() throws Exception { // We Set the input Record getting the XML from the classpath final MetadataRecord mr = new MetadataRecord(); @@ -79,7 +79,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform Inst.&Them.v4 record XML with zenodo_tr") - public void testTransformITGv4Zenodo() throws Exception { + void testTransformITGv4Zenodo() throws Exception { // We Set the input Record getting the XML from the classpath final MetadataRecord mr = new MetadataRecord(); @@ -97,7 +97,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform record XML with xslt_cleaning_datarepo_datacite/oaiOpenAIRE") - public void testTransformMostlyUsedScript() throws Exception { + void testTransformMostlyUsedScript() throws Exception { String xslTransformationScript = ""; xslTransformationScript = "/eu/dnetlib/dhp/transform/scripts/xslt_cleaning_datarepo_datacite.xsl"; @@ -119,7 +119,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform record XML with xslt_cleaning_REST_OmicsDI") - public void testTransformRestScript() throws Exception { + void testTransformRestScript() throws Exception { String xslTransformationScript = ""; xslTransformationScript = "/eu/dnetlib/dhp/transform/scripts/xslt_cleaning_REST_OmicsDI.xsl"; @@ -140,7 +140,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test TransformSparkJobNode.main with oaiOpenaire_datacite (v4)") - public void transformTestITGv4OAIdatacite(@TempDir Path testDir) throws Exception { + void transformTestITGv4OAIdatacite(@TempDir Path testDir) throws Exception { try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) { @@ -203,7 +203,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test TransformSparkJobNode.main") - public void transformTest(@TempDir Path testDir) throws Exception { + void transformTest(@TempDir Path testDir) throws Exception { try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) { diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java index 2caa66db4..38ffd28fe 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java @@ -7,6 +7,7 @@ import java.io.IOException; import java.io.OutputStreamWriter; import java.nio.charset.StandardCharsets; import java.sql.ResultSet; +import java.sql.SQLException; import java.util.Arrays; import java.util.List; import java.util.function.Consumer; @@ -32,11 +33,11 @@ public class ReadBlacklistFromDB implements Closeable { private final DbClient dbClient; private static final Log log = LogFactory.getLog(ReadBlacklistFromDB.class); - private final Configuration conf; + private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private final static String query = "SELECT source_type, unnest(original_source_objects) as source, " + + private static final String QUERY = "SELECT source_type, unnest(original_source_objects) as source, " + "target_type, unnest(original_target_objects) as target, " + "relationship FROM blacklist WHERE status = 'ACCEPTED'"; @@ -60,12 +61,12 @@ public class ReadBlacklistFromDB implements Closeable { dbPassword)) { log.info("Processing blacklist..."); - rbl.execute(query, rbl::processBlacklistEntry); + rbl.execute(QUERY, rbl::processBlacklistEntry); } } - public void execute(final String sql, final Function> producer) throws Exception { + public void execute(final String sql, final Function> producer) { final Consumer consumer = rs -> producer.apply(rs).forEach(r -> writeRelation(r)); @@ -99,7 +100,7 @@ public class ReadBlacklistFromDB implements Closeable { return Arrays.asList(direct, inverse); - } catch (final Exception e) { + } catch (final SQLException e) { throw new RuntimeException(e); } } @@ -112,12 +113,14 @@ public class ReadBlacklistFromDB implements Closeable { public ReadBlacklistFromDB( final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword) - throws Exception { + throws IOException { this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); - this.conf = new Configuration(); - this.conf.set("fs.defaultFS", hdfsNameNode); - FileSystem fileSystem = FileSystem.get(this.conf); + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); Path hdfsWritePath = new Path(hdfsPath); FSDataOutputStream fsDataOutputStream = null; if (fileSystem.exists(hdfsWritePath)) { @@ -133,7 +136,7 @@ public class ReadBlacklistFromDB implements Closeable { try { writer.write(OBJECT_MAPPER.writeValueAsString(r)); writer.newLine(); - } catch (final Exception e) { + } catch (final IOException e) { throw new RuntimeException(e); } } diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java index 91bcb9d1c..38ef63d27 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java @@ -114,10 +114,8 @@ public class SparkRemoveBlacklistedRelationJob { .map((MapFunction, Relation>) c -> { Relation ir = c._1(); Optional obl = Optional.ofNullable(c._2()); - if (obl.isPresent()) { - if (ir.equals(obl.get())) { - return null; - } + if (obl.isPresent() && ir.equals(obl.get())) { + return null; } return ir; }, Encoders.bean(Relation.class)) diff --git a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java index 585848589..058ea271c 100644 --- a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java +++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java @@ -62,7 +62,7 @@ public class BlackListTest { } @Test - public void noRemoveTest() throws Exception { + void noRemoveTest() throws Exception { SparkRemoveBlacklistedRelationJob .main( new String[] { @@ -89,7 +89,7 @@ public class BlackListTest { } @Test - public void removeNoMergeMatchTest() throws Exception { + void removeNoMergeMatchTest() throws Exception { SparkRemoveBlacklistedRelationJob .main( new String[] { @@ -128,7 +128,7 @@ public class BlackListTest { } @Test - public void removeMergeMatchTest() throws Exception { + void removeMergeMatchTest() throws Exception { SparkRemoveBlacklistedRelationJob .main( new String[] { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java index 429eb7d11..584438d44 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java @@ -9,19 +9,24 @@ import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.DateUtils; +import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerRelatedDatasource; +import eu.dnetlib.broker.objects.OaBrokerTypedValue; import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; public class EventFactory { - private final static String PRODUCER_ID = "OpenAIRE"; + private static final String PRODUCER_ID = "OpenAIRE"; - private final static String[] DATE_PATTERNS = { + private static final String[] DATE_PATTERNS = { "yyyy-MM-dd" }; + private EventFactory() { + } + public static Event newBrokerEvent(final UpdateInfo updateInfo) { final Event res = new Event(); @@ -61,7 +66,7 @@ public class EventFactory { map.setTargetResultId(target.getOpenaireId()); final List titles = target.getTitles(); - if (titles.size() > 0) { + if (!titles.isEmpty()) { map.setTargetResultTitle(titles.get(0)); } @@ -70,8 +75,12 @@ public class EventFactory { map.setTargetDateofacceptance(date); } - map.setTargetSubjects(target.getSubjects().stream().map(s -> s.getValue()).collect(Collectors.toList())); - map.setTargetAuthors(target.getCreators().stream().map(a -> a.getFullname()).collect(Collectors.toList())); + map + .setTargetSubjects( + target.getSubjects().stream().map(OaBrokerTypedValue::getValue).collect(Collectors.toList())); + map + .setTargetAuthors( + target.getCreators().stream().map(OaBrokerAuthor::getFullname).collect(Collectors.toList())); // PROVENANCE INFO map.setTrust(updateInfo.getTrust()); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java index 89fc2e703..f0aa6491f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java @@ -10,15 +10,11 @@ import org.apache.spark.sql.Encoder; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; -import org.apache.spark.sql.TypedColumn; import org.apache.spark.sql.expressions.Aggregator; import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.broker.model.Event; import eu.dnetlib.dhp.broker.oa.util.ClusterUtils; @@ -88,8 +84,7 @@ class CountAggregator extends Aggregator, Tuple2 merge(final Tuple2 arg0, final Tuple2 arg1) { - final String s = StringUtils.defaultIfBlank(arg0._1, arg1._1); - return new Tuple2<>(s, arg0._2 + arg1._2); + return doMerge(arg0, arg1); } @Override @@ -99,6 +94,10 @@ class CountAggregator extends Aggregator, Tuple2 reduce(final Tuple2 arg0, final Tuple2 arg1) { + return doMerge(arg0, arg1); + } + + private Tuple2 doMerge(final Tuple2 arg0, final Tuple2 arg1) { final String s = StringUtils.defaultIfBlank(arg0._1, arg1._1); return new Tuple2<>(s, arg0._2 + arg1._2); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java index 05ff2aa38..0fbc763e0 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.broker.oa; +import java.io.IOException; import java.util.Date; import java.util.HashMap; import java.util.Map; @@ -98,7 +99,6 @@ public class IndexEventSubsetJob { .javaRDD(); final Map esCfg = new HashMap<>(); - // esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54"); esCfg.put("es.index.auto.create", "false"); esCfg.put("es.nodes", indexHost); @@ -114,11 +114,11 @@ public class IndexEventSubsetJob { log.info("*** Deleting old events"); final String message = deleteOldEvents(brokerApiBaseUrl, now - 1000); - log.info("*** Deleted events: " + message); + log.info("*** Deleted events: {}", message); } - private static String deleteOldEvents(final String brokerApiBaseUrl, final long l) throws Exception { + private static String deleteOldEvents(final String brokerApiBaseUrl, final long l) throws IOException { final String url = brokerApiBaseUrl + "/api/events/byCreationDate/0/" + l; final HttpDelete req = new HttpDelete(url); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java index 80549e1ce..e8ef5dd3e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java @@ -33,11 +33,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.broker.model.ConditionParams; -import eu.dnetlib.dhp.broker.model.Event; -import eu.dnetlib.dhp.broker.model.MappedFields; -import eu.dnetlib.dhp.broker.model.Notification; -import eu.dnetlib.dhp.broker.model.Subscription; +import eu.dnetlib.dhp.broker.model.*; import eu.dnetlib.dhp.broker.oa.util.ClusterUtils; import eu.dnetlib.dhp.broker.oa.util.NotificationGroup; import eu.dnetlib.dhp.broker.oa.util.SubscriptionUtils; @@ -89,9 +85,9 @@ public class IndexNotificationsJob { final List subscriptions = listSubscriptions(brokerApiBaseUrl); - log.info("Number of subscriptions: " + subscriptions.size()); + log.info("Number of subscriptions: {}", subscriptions.size()); - if (subscriptions.size() > 0) { + if (!subscriptions.isEmpty()) { final Encoder ngEncoder = Encoders.bean(NotificationGroup.class); final Encoder nEncoder = Encoders.bean(Notification.class); final Dataset notifications = ClusterUtils @@ -106,7 +102,6 @@ public class IndexNotificationsJob { .javaRDD(); final Map esCfg = new HashMap<>(); - // esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54"); esCfg.put("es.index.auto.create", "false"); esCfg.put("es.nodes", indexHost); @@ -122,7 +117,7 @@ public class IndexNotificationsJob { log.info("*** Deleting old notifications"); final String message = deleteOldNotifications(brokerApiBaseUrl, startTime - 1000); - log.info("*** Deleted notifications: " + message); + log.info("*** Deleted notifications: {}", message); log.info("*** sendNotifications (emails, ...)"); sendNotifications(brokerApiBaseUrl, startTime - 1000); @@ -174,28 +169,28 @@ public class IndexNotificationsJob { return false; } - if (conditions.containsKey("targetDateofacceptance") && !conditions + if (conditions.containsKey("targetDateofacceptance") && conditions .get("targetDateofacceptance") .stream() - .anyMatch( + .noneMatch( c -> SubscriptionUtils .verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) { return false; } if (conditions.containsKey("targetResultTitle") - && !conditions + && conditions .get("targetResultTitle") .stream() - .anyMatch(c -> SubscriptionUtils.verifySimilar(map.getTargetResultTitle(), c.getValue()))) { + .noneMatch(c -> SubscriptionUtils.verifySimilar(map.getTargetResultTitle(), c.getValue()))) { return false; } if (conditions.containsKey("targetAuthors") - && !conditions + && conditions .get("targetAuthors") .stream() - .allMatch(c -> SubscriptionUtils.verifyListSimilar(map.getTargetAuthors(), c.getValue()))) { + .noneMatch(c -> SubscriptionUtils.verifyListSimilar(map.getTargetAuthors(), c.getValue()))) { return false; } @@ -207,7 +202,7 @@ public class IndexNotificationsJob { } - private static List listSubscriptions(final String brokerApiBaseUrl) throws Exception { + private static List listSubscriptions(final String brokerApiBaseUrl) throws IOException { final String url = brokerApiBaseUrl + "/api/subscriptions"; final HttpGet req = new HttpGet(url); @@ -222,7 +217,7 @@ public class IndexNotificationsJob { } } - private static String deleteOldNotifications(final String brokerApiBaseUrl, final long l) throws Exception { + private static String deleteOldNotifications(final String brokerApiBaseUrl, final long l) throws IOException { final String url = brokerApiBaseUrl + "/api/notifications/byDate/0/" + l; final HttpDelete req = new HttpDelete(url); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java index 380a689e4..0c74d8a6d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java @@ -7,6 +7,7 @@ import java.util.Map; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.elasticsearch.spark.rdd.api.java.JavaEsSpark; @@ -61,7 +62,7 @@ public class IndexOnESJob { final JavaRDD inputRdd = ClusterUtils .readPath(spark, eventsPath, Event.class) - .map(IndexOnESJob::eventAsJsonString, Encoders.STRING()) + .map((MapFunction) IndexOnESJob::eventAsJsonString, Encoders.STRING()) .javaRDD(); final Map esCfg = new HashMap<>(); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java index e061c0d3b..b5c891bb8 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java @@ -77,9 +77,8 @@ public class PartitionEventsByDsIdJob { } log.info("validOpendoarIds: {}", validOpendoarIds); - runWithSparkSession(conf, isSparkSessionManaged, spark -> { - - ClusterUtils + runWithSparkSession( + conf, isSparkSessionManaged, spark -> ClusterUtils .readPath(spark, eventsPath, Event.class) .filter((FilterFunction) e -> StringUtils.isNotBlank(e.getMap().getTargetDatasourceId())) .filter((FilterFunction) e -> e.getMap().getTargetDatasourceId().startsWith(OPENDOAR_NSPREFIX)) @@ -92,9 +91,7 @@ public class PartitionEventsByDsIdJob { .partitionBy("group") .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(partitionPath); - - }); + .json(partitionPath)); renameSubDirs(partitionPath); } @@ -102,14 +99,14 @@ public class PartitionEventsByDsIdJob { private static void renameSubDirs(final String path) throws IOException { final FileSystem fs = FileSystem.get(new Configuration()); - log.info("** Renaming subdirs of " + path); + log.info("** Renaming subdirs of {}", path); for (final FileStatus fileStatus : fs.listStatus(new Path(path))) { if (fileStatus.isDirectory()) { final Path oldPath = fileStatus.getPath(); final String oldName = oldPath.getName(); if (oldName.contains("=")) { final Path newPath = new Path(path + "/" + StringUtils.substringAfter(oldName, "=")); - log.info(" * " + oldPath.getName() + " -> " + newPath.getName()); + log.info(" * {} -> {}", oldPath.getName(), newPath.getName()); fs.rename(oldPath, newPath); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java index 61ab5e250..2a247b7aa 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java @@ -105,7 +105,7 @@ public class PrepareRelatedDatasourcesJob { .filter((FilterFunction) r -> !ClusterUtils.isDedupRoot(r.getId())) .filter((FilterFunction) r -> r.getDataInfo().getDeletedbyinference()) .map( - (MapFunction) r -> DatasourceRelationsAccumulator.calculateTuples(r), + (MapFunction) DatasourceRelationsAccumulator::calculateTuples, Encoders.bean(DatasourceRelationsAccumulator.class)) .flatMap( (FlatMapFunction>) acc -> acc diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java index fba82aa8c..87fed7db7 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java @@ -26,7 +26,7 @@ public abstract class UpdateMatcher { private final BiConsumer compileHighlightFunction; private final Function highlightToStringFunction; - public UpdateMatcher(final int maxNumber, final Function topicFunction, + protected UpdateMatcher(final int maxNumber, final Function topicFunction, final BiConsumer compileHighlightFunction, final Function highlightToStringFunction) { this.maxNumber = maxNumber; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java index 2f73a2448..88ad48178 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java @@ -14,11 +14,11 @@ import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; public abstract class AbstractEnrichMissingDataset extends UpdateMatcher { - public AbstractEnrichMissingDataset(final Topic topic) { + protected AbstractEnrichMissingDataset(final Topic topic) { super(10, rel -> topic, (p, rel) -> p.getDatasets().add(rel), - rel -> rel.getOpenaireId()); + OaBrokerRelatedDataset::getOpenaireId); } protected abstract boolean filterByType(String relType); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java index ab2735f2a..440602772 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java @@ -15,7 +15,7 @@ public class EnrichMissingProject extends UpdateMatcher { super(20, prj -> Topic.ENRICH_MISSING_PROJECT, (p, prj) -> p.getProjects().add(prj), - prj -> prj.getOpenaireId()); + OaBrokerProject::getOpenaireId); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java index 85086a6df..2e523da2f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java @@ -18,7 +18,7 @@ public class EnrichMoreProject extends UpdateMatcher { super(20, prj -> Topic.ENRICH_MORE_PROJECT, (p, prj) -> p.getProjects().add(prj), - prj -> prj.getOpenaireId()); + OaBrokerProject::getOpenaireId); } @Override @@ -32,7 +32,7 @@ public class EnrichMoreProject extends UpdateMatcher { final Set existingProjects = target .getProjects() .stream() - .map(p -> p.getOpenaireId()) + .map(OaBrokerProject::getOpenaireId) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java index 7ba3e5e02..a709eea30 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java @@ -14,11 +14,11 @@ import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; public abstract class AbstractEnrichMissingPublication extends UpdateMatcher { - public AbstractEnrichMissingPublication(final Topic topic) { + protected AbstractEnrichMissingPublication(final Topic topic) { super(10, rel -> topic, (p, rel) -> p.getPublications().add(rel), - rel -> rel.getOpenaireId()); + OaBrokerRelatedPublication::getOpenaireId); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java index a638024bc..a75666027 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java @@ -16,7 +16,7 @@ public class EnrichMissingSoftware super(10, s -> Topic.ENRICH_MISSING_SOFTWARE, (p, s) -> p.getSoftwares().add(s), - s -> s.getOpenaireId()); + OaBrokerRelatedSoftware::getOpenaireId); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java index a6cd34359..ec340b42f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java @@ -18,7 +18,7 @@ public class EnrichMoreSoftware extends UpdateMatcher { super(10, s -> Topic.ENRICH_MORE_SOFTWARE, (p, s) -> p.getSoftwares().add(s), - s -> s.getOpenaireId()); + OaBrokerRelatedSoftware::getOpenaireId); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java index e834d1dde..125eac862 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java @@ -20,7 +20,7 @@ public class EnrichMissingAuthorOrcid extends UpdateMatcher { super(40, aut -> Topic.ENRICH_MISSING_AUTHOR_ORCID, (p, aut) -> p.getCreators().add(aut), - aut -> aut.getOrcid()); + OaBrokerAuthor::getOrcid); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java index 4e4003890..f32cec90d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java @@ -23,7 +23,7 @@ public class EnrichMissingPid extends UpdateMatcher { protected List findDifferences(final OaBrokerMainEntity source, final OaBrokerMainEntity target) { - if (target.getPids().size() > 0) { + if (!target.getPids().isEmpty()) { return Arrays.asList(); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java index cb3ea5464..f07bbd52f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java @@ -35,7 +35,7 @@ public class EnrichMissingSubject extends UpdateMatcher { } }, (p, s) -> p.getSubjects().add(s), - s -> subjectAsString(s)); + EnrichMissingSubject::subjectAsString); } @Override @@ -49,7 +49,7 @@ public class EnrichMissingSubject extends UpdateMatcher { final Set existingSubject = target .getSubjects() .stream() - .map(s -> subjectAsString(s)) + .map(EnrichMissingSubject::subjectAsString) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java index 46f6fa80c..585531095 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java @@ -33,7 +33,7 @@ public class EnrichMoreOpenAccess extends UpdateMatcher { .getInstances() .stream() .filter(i -> i.getLicense().equals(BrokerConstants.OPEN_ACCESS)) - .map(i -> i.getUrl()) + .map(OaBrokerInstance::getUrl) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java index 609437b9d..a98b96b99 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java @@ -18,7 +18,7 @@ public class EnrichMorePid extends UpdateMatcher { super(20, pid -> Topic.ENRICH_MORE_PID, (p, pid) -> p.getPids().add(pid), - pid -> pidAsString(pid)); + EnrichMorePid::pidAsString); } @Override @@ -32,7 +32,7 @@ public class EnrichMorePid extends UpdateMatcher { final Set existingPids = target .getPids() .stream() - .map(pid -> pidAsString(pid)) + .map(EnrichMorePid::pidAsString) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java index 1f6edf96e..b62b509c7 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java @@ -35,7 +35,7 @@ public class EnrichMoreSubject extends UpdateMatcher { } }, (p, s) -> p.getSubjects().add(s), - s -> subjectAsString(s)); + EnrichMoreSubject::subjectAsString); } @Override @@ -49,7 +49,7 @@ public class EnrichMoreSubject extends UpdateMatcher { final Set existingSubjects = target .getSubjects() .stream() - .map(pid -> subjectAsString(pid)) + .map(EnrichMoreSubject::subjectAsString) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java index 2055a014e..790ca4e61 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java @@ -12,6 +12,9 @@ import eu.dnetlib.dhp.schema.common.ModelSupport; public class BrokerConstants { + private BrokerConstants() { + } + public static final String OPEN_ACCESS = "OPEN"; public static final String IS_MERGED_IN_CLASS = ModelConstants.IS_MERGED_IN; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java index 7c4ca1d22..2e9c03990 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java @@ -24,6 +24,9 @@ public class ClusterUtils { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private ClusterUtils() { + } + public static void createDirIfMissing(final SparkSession spark, final String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java index 6f0a52244..bc37203d3 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.broker.oa.util; import java.util.ArrayList; import java.util.List; import java.util.Objects; +import java.util.function.Function; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; @@ -13,8 +14,6 @@ import org.dom4j.DocumentHelper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Function; - import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerExternalReference; import eu.dnetlib.broker.objects.OaBrokerInstance; @@ -46,6 +45,9 @@ public class ConversionUtils { private static final Logger log = LoggerFactory.getLogger(ConversionUtils.class); + private ConversionUtils() { + } + public static List oafInstanceToBrokerInstances(final Instance i) { if (i == null) { return new ArrayList<>(); @@ -69,7 +71,7 @@ public class ConversionUtils { return sp != null ? new OaBrokerTypedValue(classId(sp.getQualifier()), sp.getValue()) : null; } - public static final OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) { + public static OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) { if (d == null) { return null; } @@ -100,7 +102,7 @@ public class ConversionUtils { return res; } - public static final OaBrokerMainEntity oafResultToBrokerResult(final Result result) { + public static OaBrokerMainEntity oafResultToBrokerResult(final Result result) { if (result == null) { return null; } @@ -142,12 +144,12 @@ public class ConversionUtils { final String pids = author.getPid() != null ? author .getPid() .stream() - .filter(pid -> pid != null) + .filter(Objects::nonNull) .filter(pid -> pid.getQualifier() != null) .filter(pid -> pid.getQualifier().getClassid() != null) .filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID)) - .map(pid -> pid.getValue()) - .map(pid -> cleanOrcid(pid)) + .map(StructuredProperty::getValue) + .map(ConversionUtils::cleanOrcid) .filter(StringUtils::isNotBlank) .findFirst() .orElse(null) : null; @@ -187,7 +189,7 @@ public class ConversionUtils { return res; } - public static final OaBrokerProject oafProjectToBrokerProject(final Project p) { + public static OaBrokerProject oafProjectToBrokerProject(final Project p) { if (p == null) { return null; } @@ -206,14 +208,14 @@ public class ConversionUtils { res.setJurisdiction(fdoc.valueOf("/fundingtree/funder/jurisdiction")); res.setFundingProgram(fdoc.valueOf("//funding_level_0/name")); } catch (final DocumentException e) { - log.error("Error in record " + p.getId() + ": invalid fundingtree: " + ftree); + log.error("Error in record {}: invalid fundingtree: {}", p.getId(), ftree); } } return res; } - public static final OaBrokerRelatedSoftware oafSoftwareToBrokerSoftware(final Software sw) { + public static OaBrokerRelatedSoftware oafSoftwareToBrokerSoftware(final Software sw) { if (sw == null) { return null; } @@ -228,7 +230,7 @@ public class ConversionUtils { return res; } - public static final OaBrokerRelatedDatasource oafDatasourceToBrokerDatasource(final Datasource ds) { + public static OaBrokerRelatedDatasource oafDatasourceToBrokerDatasource(final Datasource ds) { if (ds == null) { return null; } @@ -241,7 +243,7 @@ public class ConversionUtils { } private static String first(final List list) { - return list != null && list.size() > 0 ? list.get(0) : null; + return list != null && !list.isEmpty() ? list.get(0) : null; } private static String kvValue(final KeyValue kv) { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java index c693be93c..658a42ac1 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java @@ -10,6 +10,8 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple3; @@ -39,7 +41,7 @@ public class DatasourceRelationsAccumulator implements Serializable { final Set collectedFromSet = r .getCollectedfrom() .stream() - .map(kv -> kv.getKey()) + .map(KeyValue::getKey) .filter(StringUtils::isNotBlank) .distinct() .collect(Collectors.toSet()); @@ -47,10 +49,10 @@ public class DatasourceRelationsAccumulator implements Serializable { final Set hostedBySet = r .getInstance() .stream() - .map(i -> i.getHostedby()) + .map(Instance::getHostedby) .filter(Objects::nonNull) .filter(kv -> !StringUtils.equalsIgnoreCase(kv.getValue(), "Unknown Repository")) - .map(kv -> kv.getKey()) + .map(KeyValue::getKey) .filter(StringUtils::isNotBlank) .distinct() .filter(id -> !collectedFromSet.contains(id)) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java index 103751f95..b2214e07e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java @@ -41,8 +41,6 @@ import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup; public class EventFinder { - private static final Logger log = LoggerFactory.getLogger(EventFinder.class); - private static final List> matchers = new ArrayList<>(); static { matchers.add(new EnrichMissingAbstract()); @@ -72,6 +70,9 @@ public class EventFinder { matchers.add(new EnrichMissingDatasetIsSupplementedBy()); } + private EventFinder() { + } + public static EventGroup generateEvents(final ResultGroup results, final Set dsIdWhitelist, final Set dsIdBlacklist, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java index adb1c753b..cf3562193 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java @@ -12,6 +12,9 @@ public class SubscriptionUtils { private static final long ONE_DAY = 86_400_000; + private SubscriptionUtils() { + } + public static boolean verifyListSimilar(final List list, final String value) { return list.stream().anyMatch(s -> verifySimilar(s, value)); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java index 72fe1b204..a49801f32 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java @@ -30,7 +30,9 @@ public class TrustUtils { } catch (final IOException e) { log.error("Error loading dedupConfig, e"); } + } + private TrustUtils() { } protected static float calculateTrust(final OaBrokerMainEntity r1, final OaBrokerMainEntity r2) { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java index 5a9cb5e09..d29414e52 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java @@ -88,14 +88,14 @@ public final class UpdateInfo { .getDatasources() .stream() .filter(ds -> ds.getRelType().equals(BrokerConstants.COLLECTED_FROM_REL)) - .map(ds -> ds.getName()) + .map(OaBrokerRelatedDatasource::getName) .findFirst() .orElse(""); final String provType = getSource() .getDatasources() .stream() .filter(ds -> ds.getRelType().equals(BrokerConstants.COLLECTED_FROM_REL)) - .map(ds -> ds.getType()) + .map(OaBrokerRelatedDatasource::getType) .findFirst() .orElse(""); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java index 8fa95abe5..45bfc785f 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.broker.oa.matchers; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.Arrays; @@ -72,7 +73,7 @@ class UpdateMatcherTest { final Collection> list = matcher .searchUpdatesForRecord(res, targetDs, Arrays.asList(p1, p2, p3, p4), null); - assertTrue(list.size() == 1); + assertEquals(1, list.size()); } @Test @@ -127,7 +128,7 @@ class UpdateMatcherTest { final Collection> list = matcher .searchUpdatesForRecord(res, targetDs, Arrays.asList(p1, p2, p3, p4), null); - assertTrue(list.size() == 1); + assertEquals(1, list.size()); } } diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java index 77a19af4c..550ded9f4 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.simple; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.List; @@ -32,7 +33,7 @@ class EnrichMissingPublicationDateTest { final OaBrokerMainEntity target = new OaBrokerMainEntity(); source.setPublicationdate("2018"); final List list = matcher.findDifferences(source, target); - assertTrue(list.size() == 1); + assertEquals(1, list.size()); } @Test diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java index 974baa28b..a8bc03e31 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java @@ -9,67 +9,67 @@ import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerTypedValue; -public class TrustUtilsTest { +class TrustUtilsTest { private static final double THRESHOLD = 0.95; @Test - public void rescaleTest_1() { + void rescaleTest_1() { verifyValue(-0.3, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_2() { + void rescaleTest_2() { verifyValue(0.0, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_3() { + void rescaleTest_3() { verifyValue(0.5, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_4() { + void rescaleTest_4() { verifyValue(0.95, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_5() { + void rescaleTest_5() { verifyValue(0.96, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_6() { + void rescaleTest_6() { verifyValue(0.97, 0.3f); } @Test - public void rescaleTest_7() { + void rescaleTest_7() { verifyValue(0.98, 0.45f); } @Test - public void rescaleTest_8() { + void rescaleTest_8() { verifyValue(0.99, 0.6f); } @Test - public void rescaleTest_9() { + void rescaleTest_9() { verifyValue(1.00, BrokerConstants.MAX_TRUST); } @Test - public void rescaleTest_10() { + void rescaleTest_10() { verifyValue(1.01, BrokerConstants.MAX_TRUST); } @Test - public void rescaleTest_11() { + void rescaleTest_11() { verifyValue(2.00, BrokerConstants.MAX_TRUST); } @Test - public void test() throws Exception { + void test() { final OaBrokerMainEntity r1 = new OaBrokerMainEntity(); r1.getTitles().add("D-NET Service Package: Data Import"); r1.getPids().add(new OaBrokerTypedValue("doi", "123")); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java index 647a1b9c8..6a9b21b00 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java @@ -20,6 +20,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; @@ -43,23 +44,26 @@ abstract class AbstractSparkAction implements Serializable { protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - public ArgumentApplicationParser parser; // parameters for the spark action - public SparkSession spark; // the spark session + public final ArgumentApplicationParser parser; // parameters for the spark action + public final SparkSession spark; // the spark session - public AbstractSparkAction(ArgumentApplicationParser parser, SparkSession spark) { + protected AbstractSparkAction(ArgumentApplicationParser parser, SparkSession spark) { this.parser = parser; this.spark = spark; } public List getConfigurations(ISLookUpService isLookUpService, String orchestrator) - throws ISLookUpException, DocumentException, IOException { + throws ISLookUpException, DocumentException, IOException, SAXException { final String xquery = String.format("/RESOURCE_PROFILE[.//DEDUPLICATION/ACTION_SET/@id = '%s']", orchestrator); String orchestratorProfile = isLookUpService.getResourceProfileByQuery(xquery); - final Document doc = new SAXReader().read(new StringReader(orchestratorProfile)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + + final Document doc = reader.read(new StringReader(orchestratorProfile)); final String actionSetId = doc.valueOf("//DEDUPLICATION/ACTION_SET/@id"); @@ -93,7 +97,7 @@ abstract class AbstractSparkAction implements Serializable { } abstract void run(ISLookUpService isLookUpService) - throws DocumentException, IOException, ISLookUpException; + throws DocumentException, IOException, ISLookUpException, SAXException; protected static SparkSession getSparkSession(SparkConf conf) { return SparkSession.builder().config(conf).getOrCreate(); @@ -139,9 +143,7 @@ abstract class AbstractSparkAction implements Serializable { c -> c .stream() .filter(Objects::nonNull) - .filter(kv -> ModelConstants.OPENORGS_NAME.equals(kv.getValue())) - .findFirst() - .isPresent()) + .anyMatch(kv -> ModelConstants.OPENORGS_NAME.equals(kv.getValue()))) .orElse(false); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java index 558c7c440..9d767c4d2 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java @@ -23,6 +23,9 @@ public class DatePicker { private static final int YEAR_LB = 1300; private static final int YEAR_UB = Year.now().getValue() + 5; + private DatePicker() { + } + public static Field pick(final Collection dateofacceptance) { final Map frequencies = dateofacceptance @@ -61,7 +64,7 @@ public class DatePicker { .entrySet() .stream() .filter(e -> e.getValue() >= acceptThreshold) - .map(e -> e.getKey()) + .map(Map.Entry::getKey) .collect(Collectors.toList()); // cannot find strong majority diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index 5ba6f6e6d..d65853aff 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -10,14 +10,11 @@ import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; -import eu.dnetlib.dhp.oa.dedup.model.Identifier; import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -25,11 +22,12 @@ import scala.Tuple2; public class DedupRecordFactory { - private static final Logger log = LoggerFactory.getLogger(DedupRecordFactory.class); - protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + private DedupRecordFactory() { + } + public static Dataset createDedupRecord( final SparkSession spark, final DataInfo dataInfo, @@ -67,7 +65,7 @@ public class DedupRecordFactory { value._1()._1(), value._2()._2()), Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))) .groupByKey( - (MapFunction, String>) entity -> entity._1(), Encoders.STRING()) + (MapFunction, String>) Tuple2::_1, Encoders.STRING()) .mapGroups( (MapGroupsFunction, T>) (key, values) -> entityMerger(key, values, ts, dataInfo, clazz), @@ -91,7 +89,7 @@ public class DedupRecordFactory { entity.mergeFrom(duplicate); if (ModelSupport.isSubClass(duplicate, Result.class)) { Result r1 = (Result) duplicate; - if (r1.getAuthor() != null && r1.getAuthor().size() > 0) + if (r1.getAuthor() != null && !r1.getAuthor().isEmpty()) authors.add(r1.getAuthor()); if (r1.getDateofacceptance() != null) dates.add(r1.getDateofacceptance().getValue()); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java index 5806e9fa4..d79d24653 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java @@ -10,6 +10,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.google.common.collect.Sets; @@ -25,6 +26,9 @@ public class DedupUtility { public static final String OPENORGS_ID_PREFIX = "openorgs____"; public static final String CORDA_ID_PREFIX = "corda"; + private DedupUtility() { + } + public static Map constructAccumulator( final DedupConfig dedupConf, final SparkContext context) { @@ -92,14 +96,16 @@ public class DedupUtility { } public static List getConfigurations(String isLookUpUrl, String orchestrator) - throws ISLookUpException, DocumentException { + throws ISLookUpException, DocumentException, SAXException { final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookUpUrl); final String xquery = String.format("/RESOURCE_PROFILE[.//DEDUPLICATION/ACTION_SET/@id = '%s']", orchestrator); String orchestratorProfile = isLookUpService.getResourceProfileByQuery(xquery); - final Document doc = new SAXReader().read(new StringReader(orchestratorProfile)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(orchestratorProfile)); final String actionSetId = doc.valueOf("//DEDUPLICATION/ACTION_SET/@id"); final List configurations = new ArrayList<>(); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java index 5506b5470..ea738836b 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.dedup; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -33,9 +34,11 @@ public class DispatchEntitiesSparkJob { String jsonConfiguration = IOUtils .toString( - DispatchEntitiesSparkJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json")); + Objects + .requireNonNull( + DispatchEntitiesSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -54,6 +57,7 @@ public class DispatchEntitiesSparkJob { String graphTableClassName = parser.get("graphTableClassName"); log.info("graphTableClassName: {}", graphTableClassName); + @SuppressWarnings("unchecked") Class entityClazz = (Class) Class.forName(graphTableClassName); SparkConf conf = new SparkConf(); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java index 58009bfcf..a19f86380 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java @@ -42,7 +42,7 @@ public class GroupEntitiesSparkJob { private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); - private final static String ID_JPATH = "$.id"; + private static final String ID_JPATH = "$.id"; private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @@ -92,7 +92,7 @@ public class GroupEntitiesSparkJob { spark .read() .textFile(toSeq(listEntityPaths(inputPath, sc))) - .map((MapFunction) s -> parseOaf(s), Encoders.kryo(OafEntity.class)) + .map((MapFunction) GroupEntitiesSparkJob::parseOaf, Encoders.kryo(OafEntity.class)) .filter((FilterFunction) e -> StringUtils.isNotBlank(ModelSupport.idFn().apply(e))) .groupByKey((MapFunction) oaf -> ModelSupport.idFn().apply(oaf), Encoders.STRING()) .agg(aggregator) @@ -188,7 +188,7 @@ public class GroupEntitiesSparkJob { try { return OBJECT_MAPPER.readValue(s, clazz); } catch (IOException e) { - throw new RuntimeException(e); + throw new IllegalArgumentException(e); } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java index dd9b16790..81cd30f88 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java @@ -15,13 +15,13 @@ public class IdGenerator implements Serializable { // pick the best pid from the list (consider date and pidtype) public static String generate(List> pids, String defaultID) { - if (pids == null || pids.size() == 0) + if (pids == null || pids.isEmpty()) return defaultID; Identifier bp = pids .stream() .min(Identifier::compareTo) - .get(); + .orElseThrow(() -> new IllegalStateException("unable to generate id")); String prefix = substringBefore(bp.getOriginalID(), "|"); String ns = substringBefore(substringAfter(bp.getOriginalID(), "|"), "::"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java index 1e13485e5..c9c9dd8fe 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java @@ -9,7 +9,6 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -17,6 +16,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.Block; @@ -63,7 +63,7 @@ public class SparkBlockStats extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws DocumentException, IOException, ISLookUpException { + throws DocumentException, IOException, ISLookUpException, SAXException { // read oozie parameters final String graphBasePath = parser.get("graphBasePath"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java index 0aaa1e662..93027e99a 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java @@ -6,7 +6,6 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -17,8 +16,6 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -78,7 +75,7 @@ public class SparkCopyOpenorgsSimRels extends AbstractSparkAction { saveParquet(rawRels, outputPath, SaveMode.Append); - log.info("Copied " + rawRels.count() + " Similarity Relations"); + log.info("Copied {} Similarity Relations", rawRels.count()); } private boolean filterOpenorgsRels(Relation rel) { diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java index 9ece43891..bf0b7f687 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java @@ -2,34 +2,21 @@ package eu.dnetlib.dhp.oa.dedup; import java.io.IOException; -import java.util.Optional; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.PairFunction; -import org.apache.spark.sql.*; -import org.apache.spark.sql.Dataset; -import org.dom4j.DocumentException; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import eu.dnetlib.pace.util.MapDocumentUtil; -import scala.Tuple2; public class SparkCopyRelationsNoOpenorgs extends AbstractSparkAction { diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java index b41507e95..6989ec54b 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java @@ -13,6 +13,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.EntityType; @@ -54,7 +55,7 @@ public class SparkCreateDedupRecord extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws ISLookUpException, DocumentException, IOException { + throws ISLookUpException, DocumentException, IOException, SAXException { final String graphBasePath = parser.get("graphBasePath"); final String isLookUpUrl = parser.get("isLookUpUrl"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java index bfc605039..95e3dff28 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java @@ -24,6 +24,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import com.google.common.collect.Lists; import com.google.common.hash.Hashing; @@ -76,7 +77,7 @@ public class SparkCreateMergeRels extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws ISLookUpException, DocumentException, IOException { + throws ISLookUpException, DocumentException, IOException, SAXException { final String graphBasePath = parser.get("graphBasePath"); final String workingPath = parser.get("workingPath"); @@ -161,11 +162,11 @@ public class SparkCreateMergeRels extends AbstractSparkAction { private ConnectedComponent generateID(String key, Iterator> values) { - List> identifiers = Lists.newArrayList(values).stream().map(v -> { - T entity = v._2(); - Identifier identifier = Identifier.newInstance(entity); - return identifier; - }).collect(Collectors.toList()); + List> identifiers = Lists + .newArrayList(values) + .stream() + .map(v -> Identifier.newInstance(v._2())) + .collect(Collectors.toList()); String rootID = IdGenerator.generate(identifiers, key); @@ -235,7 +236,6 @@ public class SparkCreateMergeRels extends AbstractSparkAction { info.setProvenanceaction(provenanceAction); // TODO calculate the trust value based on the similarity score of the elements in the CC - // info.setTrust(); r.setDataInfo(info); return r; diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java index df3db7add..8e5e9fd69 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java @@ -18,9 +18,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java index 884967364..f89f634b5 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java @@ -7,7 +7,6 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Dataset; @@ -17,6 +16,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.Block; @@ -26,8 +26,6 @@ import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.pace.config.DedupConfig; -import eu.dnetlib.pace.model.FieldListImpl; -import eu.dnetlib.pace.model.FieldValueImpl; import eu.dnetlib.pace.model.MapDocument; import eu.dnetlib.pace.util.MapDocumentUtil; import scala.Tuple2; @@ -56,7 +54,7 @@ public class SparkCreateSimRels extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws DocumentException, IOException, ISLookUpException { + throws DocumentException, IOException, ISLookUpException, SAXException { // read oozie parameters final String graphBasePath = parser.get("graphBasePath"); @@ -110,9 +108,6 @@ public class SparkCreateSimRels extends AbstractSparkAction { Encoders.bean(Relation.class)); saveParquet(simRels, outputPath, SaveMode.Overwrite); - - log.info("Generated " + simRels.count() + " Similarity Relations"); - } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java index 657d5a832..d12048b02 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java @@ -6,10 +6,6 @@ import java.util.Optional; import java.util.Properties; import org.apache.commons.io.IOUtils; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.function.FilterFunction; @@ -23,9 +19,9 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel; +import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; @@ -84,8 +80,9 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction { log.info("table: '{}'", dbTable); log.info("dbPwd: '{}'", "xxx"); - final String entityPath = DedupUtility.createEntityPath(graphBasePath, "organization"); - final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, "organization"); + final String organizazion = ModelSupport.getMainType(EntityType.organization); + final String entityPath = DedupUtility.createEntityPath(graphBasePath, organizazion); + final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, organizazion); final String relationPath = DedupUtility.createEntityPath(graphBasePath, "relation"); Dataset newOrgs = createNewOrgs(spark, mergeRelPath, relationPath, entityPath); @@ -115,7 +112,7 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction { .textFile(relationPath) .map(patchRelFn(), Encoders.bean(Relation.class)) .toJavaRDD() - .filter(r -> filterRels(r, "organization")) + .filter(r -> filterRels(r, ModelSupport.getMainType(EntityType.organization))) // take the worst id of the diffrel: .mapToPair(rel -> { if (DedupUtility.compareOpenOrgIds(rel.getSource(), rel.getTarget()) > 0) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java index 08b39793e..61325ab50 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java @@ -21,6 +21,7 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel; +import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -32,6 +33,7 @@ import scala.Tuple3; public class SparkPrepareOrgRels extends AbstractSparkAction { private static final Logger log = LoggerFactory.getLogger(SparkPrepareOrgRels.class); + public static final String GROUP_PREFIX = "group::"; public SparkPrepareOrgRels(ArgumentApplicationParser parser, SparkSession spark) { super(parser, spark); @@ -41,7 +43,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPrepareOrgRels.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/prepareOrgRels_parameters.json"))); parser.parseArgument(args); @@ -81,8 +83,9 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { log.info("table: '{}'", dbTable); log.info("dbPwd: '{}'", "xxx"); - final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, "organization"); - final String entityPath = DedupUtility.createEntityPath(graphBasePath, "organization"); + final String organization = ModelSupport.getMainType(EntityType.organization); + final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, organization); + final String entityPath = DedupUtility.createEntityPath(graphBasePath, organization); final String relationPath = DedupUtility.createEntityPath(graphBasePath, "relation"); Dataset relations = createRelations(spark, mergeRelPath, relationPath, entityPath); @@ -168,7 +171,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { .map(g -> Lists.newArrayList(g._2())) .filter(l -> l.size() > 1) .flatMap(l -> { - String groupId = "group::" + UUID.randomUUID(); + String groupId = GROUP_PREFIX + UUID.randomUUID(); List ids = sortIds(l); // sort IDs by type List, String>> rels = new ArrayList<>(); String source = ids.get(0); @@ -192,7 +195,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { .collect(Collectors.toList()))) // : take only relations with only the group_id, it // means they are correct. If the diffRel is present the relation has to be removed - .filter(g -> g._2().size() == 1 && g._2().get(0).contains("group::")) + .filter(g -> g._2().size() == 1 && g._2().get(0).contains(GROUP_PREFIX)) .map( t -> new Tuple3<>( t._1().split("@@@")[0], @@ -255,7 +258,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { // Sort IDs basing on the type. Priority: 1) openorgs, 2)corda, 3)alphabetic public static List sortIds(List ids) { - ids.sort((o1, o2) -> DedupUtility.compareOpenOrgIds(o1, o2)); + ids.sort(DedupUtility::compareOpenOrgIds); return ids; } @@ -289,9 +292,10 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { List> rels = new ArrayList<>(); for (String id1 : g._2()) { for (String id2 : g._2()) { - if (!id1.equals(id2)) - if (id1.contains(DedupUtility.OPENORGS_ID_PREFIX) && !id2.contains("openorgsmesh")) - rels.add(new Tuple2<>(id1, id2)); + if (!id1.equals(id2) && id1.contains(DedupUtility.OPENORGS_ID_PREFIX) + && !id2.contains("openorgsmesh")) { + rels.add(new Tuple2<>(id1, id2)); + } } } return rels.iterator(); @@ -310,7 +314,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { r._2()._2().getCountry() != null ? r._2()._2().getCountry().getClassid() : "", r._2()._2().getWebsiteurl() != null ? r._2()._2().getWebsiteurl().getValue() : "", r._2()._2().getCollectedfrom().get(0).getValue(), - "group::" + r._1()._1(), + GROUP_PREFIX + r._1()._1(), structuredPropertyListToString(r._2()._2().getPid()), parseECField(r._2()._2().getEclegalbody()), parseECField(r._2()._2().getEclegalperson()), diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java index 220b0f483..0fa41bd6d 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java @@ -40,7 +40,7 @@ public class SparkPropagateRelation extends AbstractSparkAction { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPropagateRelation.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json"))); @@ -113,7 +113,7 @@ public class SparkPropagateRelation extends AbstractSparkAction { .join(r.getSource(), r.getTarget(), r.getRelType(), r.getSubRelType(), r.getRelClass()), Encoders.STRING()) .agg(new RelationAggregator().toColumn()) - .map((MapFunction, Relation>) t -> t._2(), Encoders.bean(Relation.class)); + .map((MapFunction, Relation>) Tuple2::_2, Encoders.bean(Relation.class)); } // redirect the relations to the dedupID @@ -163,7 +163,7 @@ public class SparkPropagateRelation extends AbstractSparkAction { private FilterFunction getRelationFilterFunction() { return r -> StringUtils.isNotBlank(r.getSource()) || StringUtils.isNotBlank(r.getTarget()) || - StringUtils.isNotBlank(r.getRelClass()) || + StringUtils.isNotBlank(r.getRelType()) || StringUtils.isNotBlank(r.getSubRelType()) || StringUtils.isNotBlank(r.getRelClass()); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java index fdef7f77d..49021ab58 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.oa.dedup; import java.io.IOException; +import java.util.Map; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; @@ -13,7 +14,6 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -26,8 +26,10 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.pace.util.MapDocumentUtil; @@ -72,83 +74,76 @@ public class SparkUpdateEntity extends AbstractSparkAction { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - // for each entity - ModelSupport.entityTypes - .forEach( - (type, clazz) -> { - final String outputPath = dedupGraphPath + "/" + type; - removeOutputDir(spark, outputPath); - final String ip = DedupUtility.createEntityPath(graphBasePath, type.toString()); - if (HdfsSupport.exists(ip, sc.hadoopConfiguration())) { - JavaRDD sourceEntity = sc - .textFile(DedupUtility.createEntityPath(graphBasePath, type.toString())); + for (Map.Entry e : ModelSupport.entityTypes.entrySet()) { + final EntityType type = e.getKey(); + final Class clazz = e.getValue(); + final String outputPath = dedupGraphPath + "/" + type; + removeOutputDir(spark, outputPath); + final String ip = DedupUtility.createEntityPath(graphBasePath, type.toString()); + if (HdfsSupport.exists(ip, sc.hadoopConfiguration())) { + JavaRDD sourceEntity = sc + .textFile(DedupUtility.createEntityPath(graphBasePath, type.toString())); - if (mergeRelExists(workingPath, type.toString())) { + if (mergeRelExists(workingPath, type.toString())) { - final String mergeRelPath = DedupUtility - .createMergeRelPath(workingPath, "*", type.toString()); - final String dedupRecordPath = DedupUtility - .createDedupRecordPath(workingPath, "*", type.toString()); + final String mergeRelPath = DedupUtility + .createMergeRelPath(workingPath, "*", type.toString()); + final String dedupRecordPath = DedupUtility + .createDedupRecordPath(workingPath, "*", type.toString()); - final Dataset rel = spark - .read() - .load(mergeRelPath) - .as(Encoders.bean(Relation.class)); + final Dataset rel = spark + .read() + .load(mergeRelPath) + .as(Encoders.bean(Relation.class)); - final JavaPairRDD mergedIds = rel - .where("relClass == 'merges'") - .where("source != target") - .select(rel.col("target")) - .distinct() - .toJavaRDD() - .mapToPair( - (PairFunction) r -> new Tuple2<>(r.getString(0), "d")); + final JavaPairRDD mergedIds = rel + .where("relClass == 'merges'") + .where("source != target") + .select(rel.col("target")) + .distinct() + .toJavaRDD() + .mapToPair( + (PairFunction) r -> new Tuple2<>(r.getString(0), "d")); - JavaPairRDD entitiesWithId = sourceEntity - .mapToPair( - (PairFunction) s -> new Tuple2<>( - MapDocumentUtil.getJPathString(IDJSONPATH, s), s)); - if (type == EntityType.organization) // exclude root records from organizations - entitiesWithId = excludeRootOrgs(entitiesWithId, rel); + JavaPairRDD entitiesWithId = sourceEntity + .mapToPair( + (PairFunction) s -> new Tuple2<>( + MapDocumentUtil.getJPathString(IDJSONPATH, s), s)); + if (type == EntityType.organization) // exclude root records from organizations + entitiesWithId = excludeRootOrgs(entitiesWithId, rel); - JavaRDD map = entitiesWithId - .leftOuterJoin(mergedIds) - .map(k -> { - if (k._2()._2().isPresent()) { - return updateDeletedByInference(k._2()._1(), clazz); - } - return k._2()._1(); - }); + JavaRDD map = entitiesWithId + .leftOuterJoin(mergedIds) + .map(k -> { + if (k._2()._2().isPresent()) { + return updateDeletedByInference(k._2()._1(), clazz); + } + return k._2()._1(); + }); - sourceEntity = map.union(sc.textFile(dedupRecordPath)); - - } - - sourceEntity.saveAsTextFile(outputPath, GzipCodec.class); - } - }); + sourceEntity = map.union(sc.textFile(dedupRecordPath)); + } + sourceEntity.saveAsTextFile(outputPath, GzipCodec.class); + } + } } - public boolean mergeRelExists(String basePath, String entity) { + public boolean mergeRelExists(String basePath, String entity) throws IOException { boolean result = false; - try { - FileSystem fileSystem = FileSystem.get(new Configuration()); - FileStatus[] fileStatuses = fileSystem.listStatus(new Path(basePath)); + FileSystem fileSystem = FileSystem.get(new Configuration()); + FileStatus[] fileStatuses = fileSystem.listStatus(new Path(basePath)); - for (FileStatus fs : fileStatuses) { - if (fs.isDirectory()) - if (fileSystem - .exists( - new Path(DedupUtility.createMergeRelPath(basePath, fs.getPath().getName(), entity)))) - result = true; + for (FileStatus fs : fileStatuses) { + final Path mergeRelPath = new Path( + DedupUtility.createMergeRelPath(basePath, fs.getPath().getName(), entity)); + if (fs.isDirectory() && fileSystem.exists(mergeRelPath)) { + result = true; } - - return result; - } catch (IOException e) { - throw new RuntimeException(e); } + + return result; } private static String updateDeletedByInference( diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java index 3a986a9dd..3e564052e 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java @@ -3,29 +3,15 @@ package eu.dnetlib.dhp.oa.dedup.graph; import java.io.IOException; import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.lang.StringUtils; -import org.apache.spark.api.java.function.MapFunction; import org.codehaus.jackson.annotate.JsonIgnore; -import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.Lists; -import eu.dnetlib.dhp.oa.dedup.DedupUtility; -import eu.dnetlib.dhp.oa.dedup.IdGenerator; -import eu.dnetlib.dhp.oa.dedup.model.Identifier; -import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.utils.DHPUtils; -import eu.dnetlib.pace.config.DedupConfig; -import eu.dnetlib.pace.util.MapDocumentUtil; import eu.dnetlib.pace.util.PaceException; public class ConnectedComponent implements Serializable { diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java index e821d7ef5..a25a853ef 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java @@ -19,7 +19,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.PidComparator; import eu.dnetlib.dhp.schema.oaf.utils.PidType; -public class Identifier implements Serializable, Comparable { +public class Identifier implements Serializable, Comparable> { public static final String DATE_FORMAT = "yyyy-MM-dd"; public static final String BASE_DATE = "2000-01-01"; @@ -29,8 +29,8 @@ public class Identifier implements Serializable, Comparable // cached date value private Date date = null; - public static Identifier newInstance(T entity) { - return new Identifier(entity); + public static Identifier newInstance(T entity) { + return new Identifier<>(entity); } public Identifier(T entity) { @@ -88,7 +88,7 @@ public class Identifier implements Serializable, Comparable } @Override - public int compareTo(Identifier i) { + public int compareTo(Identifier i) { // priority in comparisons: 1) pidtype, 2) collectedfrom (depending on the entity type) , 3) date 4) // alphabetical order of the originalID diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java index 7c58c375a..daea29a07 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java @@ -10,12 +10,12 @@ import org.junit.jupiter.api.Test; import com.clearspring.analytics.util.Lists; -public class DatePickerTest { +class DatePickerTest { Collection dates = Lists.newArrayList(); @Test - public void testPickISO() { + void testPickISO() { dates.add("2016-01-01T12:00:00Z"); dates.add("2016-06-16T12:00:00Z"); dates.add("2020-01-01T12:00:00Z"); @@ -24,7 +24,7 @@ public class DatePickerTest { } @Test - public void testPickSimple() { + void testPickSimple() { dates.add("2016-01-01"); dates.add("2016-06-16"); dates.add("2020-01-01"); @@ -33,7 +33,7 @@ public class DatePickerTest { } @Test - public void testPickFrequent() { + void testPickFrequent() { dates.add("2016-02-01"); dates.add("2016-02-01"); dates.add("2016-02-01"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java index 80154fbb7..e86f91f99 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java @@ -20,7 +20,7 @@ import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.pace.util.MapDocumentUtil; import scala.Tuple2; -public class EntityMergerTest implements Serializable { +class EntityMergerTest implements Serializable { private List> publications; private List> publications2; @@ -54,7 +54,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void softwareMergerTest() throws InstantiationException, IllegalAccessException { + void softwareMergerTest() throws InstantiationException, IllegalAccessException { List> softwares = readSample( testEntityBasePath + "/software_merge.json", Software.class); @@ -69,7 +69,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest() throws InstantiationException, IllegalAccessException { + void publicationMergerTest() throws InstantiationException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class); @@ -125,7 +125,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest2() throws InstantiationException, IllegalAccessException { + void publicationMergerTest2() throws InstantiationException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications2.iterator(), 0, dataInfo, Publication.class); @@ -137,7 +137,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest3() throws InstantiationException, IllegalAccessException { + void publicationMergerTest3() throws InstantiationException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications3.iterator(), 0, dataInfo, Publication.class); @@ -147,7 +147,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest4() throws InstantiationException, IllegalStateException, IllegalAccessException { + void publicationMergerTest4() throws InstantiationException, IllegalStateException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications4.iterator(), 0, dataInfo, Publication.class); @@ -157,7 +157,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest5() throws InstantiationException, IllegalStateException, IllegalAccessException { + void publicationMergerTest5() throws InstantiationException, IllegalStateException, IllegalAccessException { System.out .println( diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java index 1a279fac7..2d6637882 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java @@ -56,7 +56,7 @@ public class IdGeneratorTest { } @Test - public void generateIdTest1() { + void generateIdTest1() { String id1 = IdGenerator.generate(bestIds, "50|defaultID"); System.out @@ -66,7 +66,7 @@ public class IdGeneratorTest { } @Test - public void generateIdTest2() { + void generateIdTest2() { String id1 = IdGenerator.generate(bestIds2, "50|defaultID"); String id2 = IdGenerator.generate(bestIds3, "50|defaultID"); @@ -82,7 +82,7 @@ public class IdGeneratorTest { } @Test - public void generateIdOrganizationTest() { + void generateIdOrganizationTest() { String id1 = IdGenerator.generate(bestIdsOrg, "20|defaultID"); assertEquals("20|openorgs____::599c15a70fcb03be6ba08f75f14d6076", id1); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index bf4913056..2f992bd78 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -12,7 +12,6 @@ import java.io.IOException; import java.io.Serializable; import java.net.URISyntaxException; import java.nio.file.Paths; -import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; @@ -148,7 +147,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(1) - public void createSimRelsTest() throws Exception { + void createSimRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -196,14 +195,14 @@ public class SparkDedupTest implements Serializable { assertEquals(3082, orgs_simrel); assertEquals(7036, pubs_simrel); - assertEquals(344, sw_simrel); + assertEquals(336, sw_simrel); assertEquals(442, ds_simrel); assertEquals(6750, orp_simrel); } @Test @Order(2) - public void cutMergeRelsTest() throws Exception { + void cutMergeRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -299,7 +298,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(3) - public void createMergeRelsTest() throws Exception { + void createMergeRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -347,7 +346,7 @@ public class SparkDedupTest implements Serializable { assertEquals(1272, orgs_mergerel); assertEquals(1438, pubs_mergerel); - assertEquals(288, sw_mergerel); + assertEquals(286, sw_mergerel); assertEquals(472, ds_mergerel); assertEquals(718, orp_mergerel); @@ -355,7 +354,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(4) - public void createDedupRecordTest() throws Exception { + void createDedupRecordTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -402,7 +401,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(5) - public void updateEntityTest() throws Exception { + void updateEntityTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -518,7 +517,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(6) - public void propagateRelationTest() throws Exception { + void propagateRelationTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -536,7 +535,7 @@ public class SparkDedupTest implements Serializable { long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); - assertEquals(4862, relations); + assertEquals(4860, relations); // check deletedbyinference final Dataset mergeRels = spark @@ -568,7 +567,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(7) - public void testRelations() throws Exception { + void testRelations() throws Exception { testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_1.json", 12, 10); testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_2.json", 10, 2); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java index 97cfab118..9312d83b1 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java @@ -67,7 +67,7 @@ public class SparkOpenorgsDedupTest implements Serializable { public static void cleanUp() throws IOException, URISyntaxException { testGraphBasePath = Paths - .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/openorgs/dedup").toURI()) + .get(SparkOpenorgsDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/openorgs/dedup").toURI()) .toFile() .getAbsolutePath(); testOutputBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") @@ -101,7 +101,7 @@ public class SparkOpenorgsDedupTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkOpenorgsDedupTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator_openorgs.xml"))); @@ -110,14 +110,14 @@ public class SparkOpenorgsDedupTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkOpenorgsDedupTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); } @Test @Order(1) - public void createSimRelsTest() throws Exception { + void createSimRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -148,7 +148,7 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(2) - public void copyOpenorgsSimRels() throws Exception { + void copyOpenorgsSimRels() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( @@ -177,7 +177,7 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(3) - public void createMergeRelsTest() throws Exception { + void createMergeRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -230,11 +230,11 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(4) - public void prepareOrgRelsTest() throws Exception { + void prepareOrgRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPrepareOrgRels.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/prepareOrgRels_parameters.json"))); parser @@ -313,11 +313,11 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(5) - public void prepareNewOrgsTest() throws Exception { + void prepareNewOrgsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPrepareNewOrgs.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/prepareNewOrgs_parameters.json"))); parser diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java index 606dd9e5b..2349ffebe 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.dedup; import static java.nio.file.Files.createTempDirectory; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.mockito.Mockito.lenient; import java.io.File; @@ -12,8 +11,6 @@ import java.io.IOException; import java.io.Serializable; import java.net.URISyntaxException; import java.nio.file.Paths; -import java.util.Collections; -import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; @@ -32,9 +29,6 @@ import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -110,7 +104,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(1) - public void copyOpenorgsMergeRelTest() throws Exception { + void copyOpenorgsMergeRelTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -143,7 +137,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(2) - public void createOrgsDedupRecordTest() throws Exception { + void createOrgsDedupRecordTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -176,7 +170,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(3) - public void updateEntityTest() throws Exception { + void updateEntityTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -216,7 +210,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(4) - public void copyRelationsNoOpenorgsTest() throws Exception { + void copyRelationsNoOpenorgsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -239,7 +233,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(5) - public void propagateRelationsTest() throws Exception { + void propagateRelationsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java index 31de8d951..1ba2c717c 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java @@ -41,7 +41,7 @@ public class SparkStatsTest implements Serializable { private static final String testActionSetId = "test-orchestrator"; @BeforeAll - public static void cleanUp() throws IOException, URISyntaxException { + public static void beforeAll() throws IOException, URISyntaxException { testGraphBasePath = Paths .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/entities").toURI()) @@ -73,7 +73,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator.xml"))); @@ -82,7 +82,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); @@ -91,7 +91,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json"))); @@ -100,7 +100,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json"))); @@ -109,7 +109,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json"))); @@ -118,18 +118,18 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json"))); } @Test - public void createBlockStatsTest() throws Exception { + void createBlockStatsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/createBlockStats_parameters.json"))); parser @@ -168,10 +168,15 @@ public class SparkStatsTest implements Serializable { .textFile(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_blockstats") .count(); - assertEquals(549, orgs_blocks); - assertEquals(299, pubs_blocks); + assertEquals(477, orgs_blocks); + assertEquals(295, pubs_blocks); assertEquals(122, sw_blocks); - assertEquals(186, ds_blocks); - assertEquals(170, orp_blocks); + assertEquals(191, ds_blocks); + assertEquals(171, orp_blocks); + } + + @AfterAll + public static void tearDown() { + spark.close(); } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java index 1759180d2..7348a3bd2 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java @@ -1,13 +1,15 @@ package eu.dnetlib.dhp.oa.dedup.jpath; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.junit.platform.commons.util.StringUtils; import eu.dnetlib.pace.config.DedupConfig; import eu.dnetlib.pace.model.MapDocument; import eu.dnetlib.pace.util.MapDocumentUtil; -public class JsonPathTest { +class JsonPathTest { String json = "{\t\"dataInfo\":{\t\t\"invisible\":false,\t\t\"inferred\":false,\t\t\"deletedbyinference\":false,\t\t\"trust\":\"0.810000002384185791\",\t\t\"inferenceprovenance\":\"\",\t\t\"provenanceaction\":{\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t}\t},\t\"lastupdatetimestamp\":1584960968152,\t\"id\":\"20|corda__h2020::9faf23721249f26ac2c16eb857ea1fb9\",\t\"originalId\":[\t\t\"corda__h2020::927957582\"\t],\t\"collectedfrom\":[\t\t{\t\t\t\"key\":\"openaire____::corda_h2020\",\t\t\t\"value\":\"CORDA - COmmon Research DAta Warehouse - Horizon 2020\",\t\t\t\"dataInfo\":null\t\t}\t],\t\"pid\":[\t],\t\"dateofcollection\":\"2016-06-05\",\t\"dateoftransformation\":\"2019-11-19\",\t\"extraInfo\":[\t],\t\"oaiprovenance\":null,\t\"legalshortname\":{\t\t\"value\":\"Comentor AB\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"legalname\":{\t\t\"value\":\"Comentor AB\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"alternativeNames\":[\t],\t\"websiteurl\":{\t\t\"value\":\"http://www.comentor.se\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"logourl\":null,\t\"eclegalbody\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"eclegalperson\":{\t\t\"value\":\"true\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecnonprofit\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecresearchorganization\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"echighereducation\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecinternationalorganizationeurinterests\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecinternationalorganization\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecenterprise\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecsmevalidated\":{\t\t\"value\":\"true\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecnutscode\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"country\":null}"; DedupConfig conf = DedupConfig @@ -283,15 +285,18 @@ public class JsonPathTest { + "}"); @Test - public void testJPath() throws Exception { + void testJPath() { MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(conf, json); + Assertions.assertNotNull(d); + Assertions.assertTrue(StringUtils.isNotBlank(d.getIdentifier())); + System.out.println("d = " + d); } @Test - public void testNull() throws Exception { + void testNull() { final Object p = null; System.out.println((String) p); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java index ee6136b58..c6c207727 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java @@ -2,6 +2,7 @@ package eu.dnetlib.doiboost.crossref; import java.io.ByteArrayOutputStream; +import java.util.Objects; import java.util.Optional; import java.util.zip.Inflater; @@ -22,9 +23,11 @@ public class CrossrefImporter { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - CrossrefImporter.class - .getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/import_from_es.json"))); + Objects + .requireNonNull( + CrossrefImporter.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/import_from_es.json")))); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java index dcebbbcac..6d6a4ca78 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java @@ -1,6 +1,7 @@ package eu.dnetlib.doiboost.crossref; +import java.io.IOException; import java.util.Iterator; import java.util.List; @@ -11,8 +12,6 @@ import org.apache.http.client.methods.HttpPost; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.jayway.jsonpath.JsonPath; @@ -57,8 +56,8 @@ public class ESClient implements Iterator { try (CloseableHttpResponse response = client.execute(httpPost)) { return IOUtils.toString(response.getEntity().getContent()); } - } catch (Throwable e) { - throw new RuntimeException("Error on executing request ", e); + } catch (IOException e) { + throw new IllegalStateException("Error on executing request ", e); } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java index feb540fcd..f725b3222 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java @@ -29,8 +29,10 @@ public class ActivitiesDecompressor { private static final int MAX_XML_WORKS_PARSED = -1; private static final int XML_WORKS_PARSED_COUNTER_LOG_INTERVAL = 100000; - public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath) - throws Exception { + private ActivitiesDecompressor() { + } + + public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath) throws IOException { String uri = inputUri; FileSystem fs = FileSystem.get(URI.create(uri), conf); Path inputPath = new Path(uri); @@ -44,7 +46,7 @@ public class ActivitiesDecompressor { InputStream gzipInputStream = null; try { gzipInputStream = codec.createInputStream(fs.open(inputPath)); - parseTarActivities(fs, conf, gzipInputStream, outputPath); + parseTarActivities(conf, gzipInputStream, outputPath); } finally { Log.debug("Closing gzip stream"); @@ -52,8 +54,7 @@ public class ActivitiesDecompressor { } } - private static void parseTarActivities( - FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) { + private static void parseTarActivities(Configuration conf, InputStream gzipInputStream, Path outputPath) { int counter = 0; int doiFound = 0; int errorFromOrcidFound = 0; @@ -79,11 +80,11 @@ public class ActivitiesDecompressor { BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from // tarInput String line; - StringBuffer buffer = new StringBuffer(); + StringBuilder builder = new StringBuilder(); while ((line = br.readLine()) != null) { - buffer.append(line); + builder.append(line); } - WorkData workData = XMLRecordParser.VTDParseWorkData(buffer.toString().getBytes()); + WorkData workData = XMLRecordParser.VTDParseWorkData(builder.toString().getBytes()); if (workData != null) { if (workData.getErrorCode() != null) { errorFromOrcidFound += 1; @@ -113,7 +114,7 @@ public class ActivitiesDecompressor { } } else { - Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer); + Log.warn("Data not retrievable [" + entry.getName() + "] " + builder); xmlParserErrorFound += 1; } } @@ -177,11 +178,11 @@ public class ActivitiesDecompressor { counter++; BufferedReader br = new BufferedReader(new InputStreamReader(tais)); String line; - StringBuffer buffer = new StringBuffer(); + StringBuilder builder = new StringBuilder(); while ((line = br.readLine()) != null) { - buffer.append(line); + builder.append(line); } - String xml = buffer.toString(); + String xml = builder.toString(); String[] filenameParts = filename.split("/"); final Text key = new Text( XMLRecordParser diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java index 4de4a0266..99587b16a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java @@ -3,6 +3,7 @@ package eu.dnetlib.doiboost.orcid; import java.io.IOException; +import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -10,7 +11,6 @@ import org.apache.hadoop.fs.Path; import org.mortbay.log.Log; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork; public class ExtractXMLActivitiesData extends OrcidDSManager { private String outputWorksPath; @@ -22,11 +22,11 @@ public class ExtractXMLActivitiesData extends OrcidDSManager { extractXMLActivitiesData.extractWorks(); } - private void loadArgs(String[] args) throws Exception { + private void loadArgs(String[] args) throws ParseException, IOException { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - GenOrcidAuthorWork.class + ExtractXMLActivitiesData.class .getResourceAsStream( "/eu/dnetlib/dhp/doiboost/gen_orcid_works-no-doi_from_activities.json"))); parser.parseArgument(args); @@ -43,7 +43,6 @@ public class ExtractXMLActivitiesData extends OrcidDSManager { private void extractWorks() throws Exception { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz); Path outputPath = new Path( hdfsServerUri diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java index 5c2a35229..4121f3391 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java @@ -5,12 +5,13 @@ import java.io.IOException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.mortbay.log.Log; +import com.ximpleware.ParseException; + import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork; +import eu.dnetlib.dhp.parser.utility.VtdException; public class ExtractXMLSummariesData extends OrcidDSManager { @@ -27,7 +28,7 @@ public class ExtractXMLSummariesData extends OrcidDSManager { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - GenOrcidAuthorWork.class + ExtractXMLSummariesData.class .getResourceAsStream( "/eu/dnetlib/dhp/doiboost/gen_orcid_authors_from_summaries.json"))); parser.parseArgument(args); @@ -42,9 +43,8 @@ public class ExtractXMLSummariesData extends OrcidDSManager { Log.info("Output Authors Data: " + outputAuthorsPath); } - public void extractAuthors() throws Exception { + public void extractAuthors() throws IOException, VtdException, ParseException { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(summariesFileNameTarGz); Path outputPath = new Path( hdfsServerUri diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java index 3b4033450..7e4869fdd 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java @@ -22,9 +22,8 @@ public class OrcidAuthorsDOIsDataGen extends OrcidDSManager { orcidAuthorsDOIsDataGen.generateAuthorsDOIsData(); } - public void generateAuthorsDOIsData() throws Exception { + public void generateAuthorsDOIsData() throws IOException { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz); Path outputPath = new Path(hdfsServerUri.concat(workingPath).concat(outputAuthorsDOIsPath)); ActivitiesDecompressor.parseGzActivities(conf, tarGzUri, outputPath); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java index 73a4bfd05..0b4ef279d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java @@ -25,9 +25,8 @@ public class OrcidDSManager { orcidDSManager.generateAuthors(); } - public void generateAuthors() throws Exception { + public void generateAuthors() throws IOException { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(summariesFileNameTarGz); Path outputPath = new Path( hdfsServerUri diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java index 8cf070213..2b8e42bf6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java @@ -3,6 +3,7 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.io.FileNotFoundException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Optional; @@ -64,7 +65,7 @@ public class SparkDownloadOrcidAuthors { String lastUpdate = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt"); logger.info("lastUpdate: {}", lastUpdate); if (StringUtils.isBlank(lastUpdate)) { - throw new RuntimeException("last update info not found"); + throw new FileNotFoundException("last update info not found"); } JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -82,7 +83,7 @@ public class SparkDownloadOrcidAuthors { JavaPairRDD lamdaFileRDD = sc .sequenceFile(workingPath + lambdaFileName, Text.class, Text.class); final long lamdaFileRDDCount = lamdaFileRDD.count(); - logger.info("Data retrieved: " + lamdaFileRDDCount); + logger.info("Data retrieved: {}", lamdaFileRDDCount); Function, Boolean> isModifiedAfterFilter = data -> { String orcidId = data._1().toString(); @@ -95,7 +96,7 @@ public class SparkDownloadOrcidAuthors { return false; }; - Function, Tuple2> downloadRecordFunction = data -> { + Function, Tuple2> downloadRecordFn = data -> { String orcidId = data._1().toString(); String lastModifiedDate = data._2().toString(); final DownloadedRecordData downloaded = new DownloadedRecordData(); @@ -118,14 +119,19 @@ public class SparkDownloadOrcidAuthors { switch (statusCode) { case 403: errorHTTP403Acc.add(1); + break; case 404: errorHTTP404Acc.add(1); + break; case 409: errorHTTP409Acc.add(1); + break; case 503: errorHTTP503Acc.add(1); + break; case 525: errorHTTP525Acc.add(1); + break; default: errorHTTPGenericAcc.add(1); } @@ -145,33 +151,41 @@ public class SparkDownloadOrcidAuthors { logger.info("Start execution ..."); JavaPairRDD authorsModifiedRDD = lamdaFileRDD.filter(isModifiedAfterFilter); long authorsModifiedCount = authorsModifiedRDD.count(); - logger.info("Authors modified count: " + authorsModifiedCount); + logger.info("Authors modified count: {}", authorsModifiedCount); logger.info("Start downloading ..."); - authorsModifiedRDD - .repartition(100) - .map(downloadRecordFunction) - .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) - .saveAsNewAPIHadoopFile( - workingPath.concat(outputPath), - Text.class, - Text.class, - SequenceFileOutputFormat.class, - sc.hadoopConfiguration()); - logger.info("parsedRecordsAcc: " + parsedRecordsAcc.value().toString()); - logger.info("modifiedRecordsAcc: " + modifiedRecordsAcc.value().toString()); - logger.info("downloadedRecordsAcc: " + downloadedRecordsAcc.value().toString()); - logger.info("errorHTTP403Acc: " + errorHTTP403Acc.value().toString()); - logger.info("errorHTTP404Acc: " + errorHTTP404Acc.value().toString()); - logger.info("errorHTTP409Acc: " + errorHTTP409Acc.value().toString()); - logger.info("errorHTTP503Acc: " + errorHTTP503Acc.value().toString()); - logger.info("errorHTTP525Acc: " + errorHTTP525Acc.value().toString()); - logger.info("errorHTTPGenericAcc: " + errorHTTPGenericAcc.value().toString()); + final JavaPairRDD pairRDD = authorsModifiedRDD + .repartition(100) + .map(downloadRecordFn) + .mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))); + + saveAsSequenceFile(workingPath, outputPath, sc, pairRDD); + + logger.info("parsedRecordsAcc: {}", parsedRecordsAcc.value()); + logger.info("modifiedRecordsAcc: {}", modifiedRecordsAcc.value()); + logger.info("downloadedRecordsAcc: {}", downloadedRecordsAcc.value()); + logger.info("errorHTTP403Acc: {}", errorHTTP403Acc.value()); + logger.info("errorHTTP404Acc: {}", errorHTTP404Acc.value()); + logger.info("errorHTTP409Acc: {}", errorHTTP409Acc.value()); + logger.info("errorHTTP503Acc: {}", errorHTTP503Acc.value()); + logger.info("errorHTTP525Acc: {}", errorHTTP525Acc.value()); + logger.info("errorHTTPGenericAcc: {}", errorHTTPGenericAcc.value()); }); } + private static void saveAsSequenceFile(String workingPath, String outputPath, JavaSparkContext sc, + JavaPairRDD pairRDD) { + pairRDD + .saveAsNewAPIHadoopFile( + workingPath.concat(outputPath), + Text.class, + Text.class, + SequenceFileOutputFormat.class, + sc.hadoopConfiguration()); + } + public static boolean isModified(String orcidId, String modifiedDate, String lastUpdate) { Date modifiedDateDt; Date lastUpdateDt; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java index 59de7ca80..cab538783 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java @@ -3,8 +3,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; -import java.text.SimpleDateFormat; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.*; @@ -13,7 +11,6 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; -import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; @@ -61,7 +58,7 @@ public class SparkDownloadOrcidWorks { .orElse(Boolean.TRUE); logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); final String workingPath = parser.get("workingPath"); - logger.info("workingPath: ", workingPath); + logger.info("workingPath: {}", workingPath); final String outputPath = parser.get("outputPath"); final String token = parser.get("token"); final String hdfsServerUri = parser.get("hdfsServerUri"); @@ -169,20 +166,25 @@ public class SparkDownloadOrcidWorks { switch (statusCode) { case 403: errorHTTP403Acc.add(1); + break; case 404: errorHTTP404Acc.add(1); + break; case 409: errorHTTP409Acc.add(1); + break; case 503: errorHTTP503Acc.add(1); + break; case 525: errorHTTP525Acc.add(1); + break; default: errorHTTPGenericAcc.add(1); logger .info( - "Downloading " + orcidId + " status code: " - + response.getStatusLine().getStatusCode()); + "Downloading {} status code: {}", orcidId, + response.getStatusLine().getStatusCode()); } return downloaded.toTuple2(); } @@ -199,24 +201,24 @@ public class SparkDownloadOrcidWorks { .flatMap(retrieveWorkUrlFunction) .repartition(100) .map(downloadWorkFunction) - .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) + .mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))) .saveAsTextFile(workingPath.concat(outputPath), GzipCodec.class); - logger.info("updatedAuthorsAcc: " + updatedAuthorsAcc.value().toString()); - logger.info("parsedAuthorsAcc: " + parsedAuthorsAcc.value().toString()); - logger.info("parsedWorksAcc: " + parsedWorksAcc.value().toString()); - logger.info("modifiedWorksAcc: " + modifiedWorksAcc.value().toString()); - logger.info("maxModifiedWorksLimitAcc: " + maxModifiedWorksLimitAcc.value().toString()); - logger.info("errorCodeFoundAcc: " + errorCodeFoundAcc.value().toString()); - logger.info("errorLoadingJsonFoundAcc: " + errorLoadingJsonFoundAcc.value().toString()); - logger.info("errorLoadingXMLFoundAcc: " + errorLoadingXMLFoundAcc.value().toString()); - logger.info("errorParsingXMLFoundAcc: " + errorParsingXMLFoundAcc.value().toString()); - logger.info("downloadedRecordsAcc: " + downloadedRecordsAcc.value().toString()); - logger.info("errorHTTP403Acc: " + errorHTTP403Acc.value().toString()); - logger.info("errorHTTP409Acc: " + errorHTTP409Acc.value().toString()); - logger.info("errorHTTP503Acc: " + errorHTTP503Acc.value().toString()); - logger.info("errorHTTP525Acc: " + errorHTTP525Acc.value().toString()); - logger.info("errorHTTPGenericAcc: " + errorHTTPGenericAcc.value().toString()); + logger.info("updatedAuthorsAcc: {}", updatedAuthorsAcc.value()); + logger.info("parsedAuthorsAcc: {}", parsedAuthorsAcc.value()); + logger.info("parsedWorksAcc: {}", parsedWorksAcc.value()); + logger.info("modifiedWorksAcc: {}", modifiedWorksAcc.value()); + logger.info("maxModifiedWorksLimitAcc: {}", maxModifiedWorksLimitAcc.value()); + logger.info("errorCodeFoundAcc: {}", errorCodeFoundAcc.value()); + logger.info("errorLoadingJsonFoundAcc: {}", errorLoadingJsonFoundAcc.value()); + logger.info("errorLoadingXMLFoundAcc: {}", errorLoadingXMLFoundAcc.value()); + logger.info("errorParsingXMLFoundAcc: {}", errorParsingXMLFoundAcc.value()); + logger.info("downloadedRecordsAcc: {}", downloadedRecordsAcc.value()); + logger.info("errorHTTP403Acc: {}", errorHTTP403Acc.value()); + logger.info("errorHTTP409Acc: {}", errorHTTP409Acc.value()); + logger.info("errorHTTP503Acc: {}", errorHTTP503Acc.value()); + logger.info("errorHTTP525Acc: {}", errorHTTP525Acc.value()); + logger.info("errorHTTPGenericAcc: {}", errorHTTPGenericAcc.value()); }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java index 178d07608..d2fed61ec 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java @@ -3,7 +3,8 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.*; +import java.io.BufferedReader; +import java.io.InputStreamReader; import java.net.URI; import java.util.Arrays; import java.util.List; @@ -15,7 +16,6 @@ import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; @@ -28,10 +28,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.doiboost.orcid.util.HDFSUtil; public class SparkGenLastModifiedSeq { - private static String hdfsServerUri; - private static String workingPath; - private static String outputPath; - private static String lambdaFileName; public static void main(String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -45,12 +41,14 @@ public class SparkGenLastModifiedSeq { .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); - hdfsServerUri = parser.get("hdfsServerUri"); - workingPath = parser.get("workingPath"); - outputPath = parser.get("outputPath"); - lambdaFileName = parser.get("lambdaFileName"); - String lambdaFileUri = hdfsServerUri.concat(workingPath).concat(lambdaFileName); - String lastModifiedDateFromLambdaFileUri = "last_modified_date_from_lambda_file.txt"; + + final String hdfsServerUri = parser.get("hdfsServerUri"); + final String workingPath = parser.get("workingPath"); + final String outputPath = parser.get("outputPath"); + final String lambdaFileName = parser.get("lambdaFileName"); + + final String lambdaFileUri = hdfsServerUri.concat(workingPath).concat(lambdaFileName); + final String lastModifiedDateFromLambdaFileUri = "last_modified_date_from_lambda_file.txt"; SparkConf sparkConf = new SparkConf(); runWithSparkSession( @@ -64,7 +62,7 @@ public class SparkGenLastModifiedSeq { .concat(workingPath) .concat(outputPath)); Path hdfsreadpath = new Path(lambdaFileUri); - Configuration conf = new Configuration(); + Configuration conf = spark.sparkContext().hadoopConfiguration(); conf.set("fs.defaultFS", hdfsServerUri.concat(workingPath)); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java index 7d9f39d05..3ecb4f5e3 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java @@ -3,7 +3,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -53,13 +52,13 @@ public class SparkGenerateDoiAuthorList { .orElse(Boolean.TRUE); logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); final String workingPath = parser.get("workingPath"); - logger.info("workingPath: ", workingPath); + logger.info("workingPath: {}", workingPath); final String outputDoiAuthorListPath = parser.get("outputDoiAuthorListPath"); - logger.info("outputDoiAuthorListPath: ", outputDoiAuthorListPath); + logger.info("outputDoiAuthorListPath: {}", outputDoiAuthorListPath); final String authorsPath = parser.get("authorsPath"); - logger.info("authorsPath: ", authorsPath); + logger.info("authorsPath: {}", authorsPath); final String xmlWorksPath = parser.get("xmlWorksPath"); - logger.info("xmlWorksPath: ", xmlWorksPath); + logger.info("xmlWorksPath: {}", xmlWorksPath); SparkConf conf = new SparkConf(); runWithSparkSession( @@ -128,8 +127,7 @@ public class SparkGenerateDoiAuthorList { .concat( d1.stream(), d2.stream()); - List mergedAuthors = mergedStream.collect(Collectors.toList()); - return mergedAuthors; + return mergedStream.collect(Collectors.toList()); } if (d1 != null) { return d1; @@ -170,14 +168,6 @@ public class SparkGenerateDoiAuthorList { return authorData; } - private static WorkData loadWorkFromJson(Text orcidId, Text json) { - WorkData workData = new WorkData(); - workData.setOid(orcidId.toString()); - JsonElement jElement = new JsonParser().parse(json.toString()); - workData.setDoi(getJsonValue(jElement, "doi")); - return workData; - } - private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { JsonElement name = null; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java index 51326c610..1727f1825 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java @@ -4,7 +4,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static org.apache.spark.sql.functions.*; -import java.io.IOException; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -17,8 +16,10 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,11 +36,12 @@ import scala.Tuple2; public class SparkUpdateOrcidAuthors { + public static final Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidAuthors.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); public static void main(String[] args) throws Exception { - Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidAuthors.class); final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -53,7 +55,6 @@ public class SparkUpdateOrcidAuthors { .map(Boolean::valueOf) .orElse(Boolean.TRUE); final String workingPath = parser.get("workingPath"); -// final String outputPath = parser.get("outputPath"); SparkConf conf = new SparkConf(); runWithSparkSession( @@ -87,7 +88,7 @@ public class SparkUpdateOrcidAuthors { String jsonData = data._2().toString(); JsonElement jElement = new JsonParser().parse(jsonData); String statusCode = getJsonValue(jElement, "statusCode"); - String downloadDate = getJsonValue(jElement, "lastModifiedDate"); + if (statusCode.equals("200")) { String compressedData = getJsonValue(jElement, "compressedData"); if (StringUtils.isEmpty(compressedData)) { @@ -135,7 +136,7 @@ public class SparkUpdateOrcidAuthors { .col("authorData.oid") .equalTo(downloadedAuthorSummaryDS.col("authorData.oid")), "full_outer") - .map(value -> { + .map((MapFunction, AuthorSummary>) value -> { Optional opCurrent = Optional.ofNullable(value._1()); Optional opDownloaded = Optional.ofNullable(value._2()); if (!opCurrent.isPresent()) { @@ -183,7 +184,8 @@ public class SparkUpdateOrcidAuthors { .groupBy("authorData.oid") .agg(array_max(collect_list("downloadDate"))) .map( - row -> new Tuple2<>(row.get(0).toString(), row.get(1).toString()), + (MapFunction>) row -> new Tuple2<>(row.get(0).toString(), + row.get(1).toString()), Encoders.tuple(Encoders.STRING(), Encoders.STRING())) .toJavaRDD() .collect(); @@ -214,25 +216,24 @@ public class SparkUpdateOrcidAuthors { .dropDuplicates("downloadDate", "authorData"); cleanedDS .toJavaRDD() - .map(authorSummary -> OBJECT_MAPPER.writeValueAsString(authorSummary)) + .map(OBJECT_MAPPER::writeValueAsString) .saveAsTextFile(workingPath.concat("orcid_dataset/new_authors"), GzipCodec.class); long cleanedDSCount = cleanedDS.count(); - logger.info("report_oldAuthorsFoundAcc: " + oldAuthorsFoundAcc.value().toString()); - logger.info("report_newAuthorsFoundAcc: " + newAuthorsFoundAcc.value().toString()); - logger.info("report_updatedAuthorsFoundAcc: " + updatedAuthorsFoundAcc.value().toString()); - logger.info("report_errorCodeFoundAcc: " + errorCodeAuthorsFoundAcc.value().toString()); - logger.info("report_errorLoadingJsonFoundAcc: " + errorLoadingAuthorsJsonFoundAcc.value().toString()); - logger.info("report_errorParsingXMLFoundAcc: " + errorParsingAuthorsXMLFoundAcc.value().toString()); - logger.info("report_merged_count: " + mergedCount); - logger.info("report_cleaned_count: " + cleanedDSCount); + logger.info("report_oldAuthorsFoundAcc: {}", oldAuthorsFoundAcc.value()); + logger.info("report_newAuthorsFoundAcc: {}", newAuthorsFoundAcc.value()); + logger.info("report_updatedAuthorsFoundAcc: {}", updatedAuthorsFoundAcc.value()); + logger.info("report_errorCodeFoundAcc: {}", errorCodeAuthorsFoundAcc.value()); + logger.info("report_errorLoadingJsonFoundAcc: {}", errorLoadingAuthorsJsonFoundAcc.value()); + logger.info("report_errorParsingXMLFoundAcc: {}", errorParsingAuthorsXMLFoundAcc.value()); + logger.info("report_merged_count: {}", mergedCount); + logger.info("report_cleaned_count: {}", cleanedDSCount); }); } private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { - JsonElement name = null; - name = jElement.getAsJsonObject().get(property); + JsonElement name = jElement.getAsJsonObject().get(property); if (name != null && !name.isJsonNull()) { return name.getAsString(); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java index fa17e97e3..aad202ff6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java @@ -3,17 +3,16 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.util.LongAccumulator; @@ -26,20 +25,19 @@ import com.google.gson.JsonElement; import com.google.gson.JsonParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.orcid.AuthorSummary; import eu.dnetlib.dhp.schema.orcid.Work; import eu.dnetlib.dhp.schema.orcid.WorkDetail; -import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; import scala.Tuple2; public class SparkUpdateOrcidDatasets { + public static final Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidDatasets.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); public static void main(String[] args) throws Exception { - Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidDatasets.class); final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -53,7 +51,6 @@ public class SparkUpdateOrcidDatasets { .map(Boolean::valueOf) .orElse(Boolean.TRUE); final String workingPath = parser.get("workingPath"); -// final String outputPath = parser.get("outputPath"); SparkConf conf = new SparkConf(); runWithSparkSession( @@ -62,25 +59,6 @@ public class SparkUpdateOrcidDatasets { spark -> { JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - LongAccumulator oldAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("old_authors_found"); - LongAccumulator updatedAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("updated_authors_found"); - LongAccumulator newAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("new_authors_found"); - LongAccumulator errorCodeAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("error_code_authors_found"); - LongAccumulator errorLoadingAuthorsJsonFoundAcc = spark - .sparkContext() - .longAccumulator("error_loading_authors_json_found"); - LongAccumulator errorParsingAuthorsXMLFoundAcc = spark - .sparkContext() - .longAccumulator("error_parsing_authors_xml_found"); - LongAccumulator oldWorksFoundAcc = spark .sparkContext() .longAccumulator("old_works_found"); @@ -100,125 +78,11 @@ public class SparkUpdateOrcidDatasets { .sparkContext() .longAccumulator("error_parsing_works_xml_found"); -// JavaPairRDD xmlSummariesRDD = sc -// .sequenceFile(workingPath.concat("xml/authors/xml_authors.seq"), Text.class, Text.class); -// xmlSummariesRDD -// .map(seq -> { -// AuthorSummary authorSummary = XMLRecordParser -// .VTDParseAuthorSummary(seq._2().toString().getBytes()); -// authorSummary -// .setBase64CompressData(ArgumentApplicationParser.compressArgument(seq._2().toString())); -// return authorSummary; -// }) -// .filter(authorSummary -> authorSummary != null) -// .map(authorSummary -> JsonWriter.create(authorSummary)) -// .saveAsTextFile(workingPath.concat("orcid_dataset/authors"), GzipCodec.class); -// -// JavaPairRDD xmlWorksRDD = sc -// .sequenceFile(workingPath.concat("xml/works/*"), Text.class, Text.class); -// -// xmlWorksRDD -// .map(seq -> { -// WorkDetail workDetail = XMLRecordParserNoDoi.VTDParseWorkData(seq._2().toString().getBytes()); -// Work work = new Work(); -// work.setWorkDetail(workDetail); -// work.setBase64CompressData(ArgumentApplicationParser.compressArgument(seq._2().toString())); -// return work; -// }) -// .filter(work -> work != null) -// .map(work -> JsonWriter.create(work)) -// .saveAsTextFile(workingPath.concat("orcid_dataset/works"), GzipCodec.class); - -// Function, AuthorSummary> retrieveAuthorSummaryFunction = data -> { -// AuthorSummary authorSummary = new AuthorSummary(); -// String orcidId = data._1().toString(); -// String jsonData = data._2().toString(); -// JsonElement jElement = new JsonParser().parse(jsonData); -// String statusCode = getJsonValue(jElement, "statusCode"); -// String downloadDate = getJsonValue(jElement, "lastModifiedDate"); -// if (statusCode.equals("200")) { -// String compressedData = getJsonValue(jElement, "compressedData"); -// if (StringUtils.isEmpty(compressedData)) { -// errorLoadingAuthorsJsonFoundAcc.add(1); -// } else { -// String xmlAuthor = ArgumentApplicationParser.decompressValue(compressedData); -// try { -// authorSummary = XMLRecordParser -// .VTDParseAuthorSummary(xmlAuthor.getBytes()); -// authorSummary.setStatusCode(statusCode); -// authorSummary.setDownloadDate("2020-11-18 00:00:05.644768"); -// authorSummary.setBase64CompressData(compressedData); -// return authorSummary; -// } catch (Exception e) { -// logger.error("parsing xml " + orcidId + " [" + jsonData + "]", e); -// errorParsingAuthorsXMLFoundAcc.add(1); -// } -// } -// } else { -// authorSummary.setStatusCode(statusCode); -// authorSummary.setDownloadDate("2020-11-18 00:00:05.644768"); -// errorCodeAuthorsFoundAcc.add(1); -// } -// return authorSummary; -// }; -// -// Dataset downloadedAuthorSummaryDS = spark -// .createDataset( -// sc -// .sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class) -// .map(retrieveAuthorSummaryFunction) -// .rdd(), -// Encoders.bean(AuthorSummary.class)); -// Dataset currentAuthorSummaryDS = spark -// .createDataset( -// sc -// .textFile(workingPath.concat("orcid_dataset/authors/*")) -// .map(item -> OBJECT_MAPPER.readValue(item, AuthorSummary.class)) -// .rdd(), -// Encoders.bean(AuthorSummary.class)); -// currentAuthorSummaryDS -// .joinWith( -// downloadedAuthorSummaryDS, -// currentAuthorSummaryDS -// .col("authorData.oid") -// .equalTo(downloadedAuthorSummaryDS.col("authorData.oid")), -// "full_outer") -// .map(value -> { -// Optional opCurrent = Optional.ofNullable(value._1()); -// Optional opDownloaded = Optional.ofNullable(value._2()); -// if (!opCurrent.isPresent()) { -// newAuthorsFoundAcc.add(1); -// return opDownloaded.get(); -// } -// if (!opDownloaded.isPresent()) { -// oldAuthorsFoundAcc.add(1); -// return opCurrent.get(); -// } -// if (opCurrent.isPresent() && opDownloaded.isPresent()) { -// updatedAuthorsFoundAcc.add(1); -// return opDownloaded.get(); -// } -// return null; -// }, -// Encoders.bean(AuthorSummary.class)) -// .filter(Objects::nonNull) -// .toJavaRDD() -// .map(authorSummary -> OBJECT_MAPPER.writeValueAsString(authorSummary)) -// .saveAsTextFile(workingPath.concat("orcid_dataset/new_authors"), GzipCodec.class); -// -// logger.info("oldAuthorsFoundAcc: " + oldAuthorsFoundAcc.value().toString()); -// logger.info("newAuthorsFoundAcc: " + newAuthorsFoundAcc.value().toString()); -// logger.info("updatedAuthorsFoundAcc: " + updatedAuthorsFoundAcc.value().toString()); -// logger.info("errorCodeFoundAcc: " + errorCodeAuthorsFoundAcc.value().toString()); -// logger.info("errorLoadingJsonFoundAcc: " + errorLoadingAuthorsJsonFoundAcc.value().toString()); -// logger.info("errorParsingXMLFoundAcc: " + errorParsingAuthorsXMLFoundAcc.value().toString()); - - Function retrieveWorkFunction = jsonData -> { + final Function retrieveWorkFunction = jsonData -> { Work work = new Work(); JsonElement jElement = new JsonParser().parse(jsonData); String statusCode = getJsonValue(jElement, "statusCode"); work.setStatusCode(statusCode); - String downloadDate = getJsonValue(jElement, "lastModifiedDate"); work.setDownloadDate("2020-11-18 00:00:05.644768"); if (statusCode.equals("200")) { String compressedData = getJsonValue(jElement, "compressedData"); @@ -247,9 +111,7 @@ public class SparkUpdateOrcidDatasets { .createDataset( sc .textFile(workingPath + "downloads/updated_works/*") - .map(s -> { - return s.substring(21, s.length() - 1); - }) + .map(s -> s.substring(21, s.length() - 1)) .map(retrieveWorkFunction) .rdd(), Encoders.bean(Work.class)); @@ -271,7 +133,7 @@ public class SparkUpdateOrcidDatasets { .col("workDetail.oid") .equalTo(downloadedWorksDS.col("workDetail.oid"))), "full_outer") - .map(value -> { + .map((MapFunction, Work>) value -> { Optional opCurrent = Optional.ofNullable(value._1()); Optional opDownloaded = Optional.ofNullable(value._2()); if (!opCurrent.isPresent()) { @@ -291,23 +153,22 @@ public class SparkUpdateOrcidDatasets { Encoders.bean(Work.class)) .filter(Objects::nonNull) .toJavaRDD() - .map(work -> OBJECT_MAPPER.writeValueAsString(work)) + .map(OBJECT_MAPPER::writeValueAsString) .saveAsTextFile(workingPath.concat("orcid_dataset/new_works"), GzipCodec.class); - logger.info("oldWorksFoundAcc: " + oldWorksFoundAcc.value().toString()); - logger.info("newWorksFoundAcc: " + newWorksFoundAcc.value().toString()); - logger.info("updatedWorksFoundAcc: " + updatedWorksFoundAcc.value().toString()); - logger.info("errorCodeWorksFoundAcc: " + errorCodeWorksFoundAcc.value().toString()); - logger.info("errorLoadingJsonWorksFoundAcc: " + errorLoadingWorksJsonFoundAcc.value().toString()); - logger.info("errorParsingXMLWorksFoundAcc: " + errorParsingWorksXMLFoundAcc.value().toString()); + logger.info("oldWorksFoundAcc: {}", oldWorksFoundAcc.value()); + logger.info("newWorksFoundAcc: {}", newWorksFoundAcc.value()); + logger.info("updatedWorksFoundAcc: {}", updatedWorksFoundAcc.value()); + logger.info("errorCodeWorksFoundAcc: {}", errorCodeWorksFoundAcc.value()); + logger.info("errorLoadingJsonWorksFoundAcc: {}", errorLoadingWorksJsonFoundAcc.value()); + logger.info("errorParsingXMLWorksFoundAcc: {}", errorParsingWorksXMLFoundAcc.value()); }); } private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { - JsonElement name = null; - name = jElement.getAsJsonObject().get(property); + JsonElement name = jElement.getAsJsonObject().get(property); if (name != null && !name.isJsonNull()) { return name.getAsString(); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java index 5ebbc01ed..64523941d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java @@ -13,6 +13,7 @@ import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.util.LongAccumulator; @@ -29,14 +30,16 @@ import eu.dnetlib.dhp.schema.orcid.Work; import eu.dnetlib.dhp.schema.orcid.WorkDetail; import eu.dnetlib.doiboost.orcid.util.HDFSUtil; import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; +import scala.Tuple2; public class SparkUpdateOrcidWorks { + public static final Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidWorks.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); public static void main(String[] args) throws Exception { - Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidWorks.class); final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -83,7 +86,6 @@ public class SparkUpdateOrcidWorks { JsonElement jElement = new JsonParser().parse(jsonData); String statusCode = getJsonValue(jElement, "statusCode"); work.setStatusCode(statusCode); - String downloadDate = getJsonValue(jElement, "lastModifiedDate"); work.setDownloadDate(Long.toString(System.currentTimeMillis())); if (statusCode.equals("200")) { String compressedData = getJsonValue(jElement, "compressedData"); @@ -112,9 +114,7 @@ public class SparkUpdateOrcidWorks { .createDataset( sc .textFile(workingPath + "downloads/updated_works/*") - .map(s -> { - return s.substring(21, s.length() - 1); - }) + .map(s -> s.substring(21, s.length() - 1)) .map(retrieveWorkFunction) .rdd(), Encoders.bean(Work.class)); @@ -136,7 +136,7 @@ public class SparkUpdateOrcidWorks { .col("workDetail.oid") .equalTo(downloadedWorksDS.col("workDetail.oid"))), "full_outer") - .map(value -> { + .map((MapFunction, Work>) value -> { Optional opCurrent = Optional.ofNullable(value._1()); Optional opDownloaded = Optional.ofNullable(value._2()); if (!opCurrent.isPresent()) { @@ -159,12 +159,12 @@ public class SparkUpdateOrcidWorks { .map(work -> OBJECT_MAPPER.writeValueAsString(work)) .saveAsTextFile(workingPath.concat("orcid_dataset/new_works"), GzipCodec.class); - logger.info("oldWorksFoundAcc: " + oldWorksFoundAcc.value().toString()); - logger.info("newWorksFoundAcc: " + newWorksFoundAcc.value().toString()); - logger.info("updatedWorksFoundAcc: " + updatedWorksFoundAcc.value().toString()); - logger.info("errorCodeWorksFoundAcc: " + errorCodeWorksFoundAcc.value().toString()); - logger.info("errorLoadingJsonWorksFoundAcc: " + errorLoadingWorksJsonFoundAcc.value().toString()); - logger.info("errorParsingXMLWorksFoundAcc: " + errorParsingWorksXMLFoundAcc.value().toString()); + logger.info("oldWorksFoundAcc: {}", oldWorksFoundAcc.value()); + logger.info("newWorksFoundAcc: {}", newWorksFoundAcc.value()); + logger.info("updatedWorksFoundAcc: {}", updatedWorksFoundAcc.value()); + logger.info("errorCodeWorksFoundAcc: {}", errorCodeWorksFoundAcc.value()); + logger.info("errorLoadingJsonWorksFoundAcc: {}", errorLoadingWorksJsonFoundAcc.value()); + logger.info("errorParsingXMLWorksFoundAcc: {}", errorParsingWorksXMLFoundAcc.value()); String lastModifiedDateFromLambdaFile = HDFSUtil .readFromTextFile(hdfsServerUri, workingPath, "last_modified_date_from_lambda_file.txt"); @@ -175,8 +175,7 @@ public class SparkUpdateOrcidWorks { private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { - JsonElement name = null; - name = jElement.getAsJsonObject().get(property); + JsonElement name = jElement.getAsJsonObject().get(property); if (name != null && !name.isJsonNull()) { return name.getAsString(); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java index c85b5b691..af6def227 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java @@ -20,6 +20,9 @@ import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.hadoop.io.compress.GzipCodec; import org.mortbay.log.Log; +import com.ximpleware.ParseException; + +import eu.dnetlib.dhp.parser.utility.VtdException; import eu.dnetlib.dhp.schema.orcid.AuthorData; import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; @@ -28,22 +31,23 @@ public class SummariesDecompressor { private static final int MAX_XML_RECORDS_PARSED = -1; - public static void parseGzSummaries(Configuration conf, String inputUri, Path outputPath) - throws Exception { - String uri = inputUri; - FileSystem fs = FileSystem.get(URI.create(uri), conf); - Path inputPath = new Path(uri); + private SummariesDecompressor() { + } + + public static void parseGzSummaries(Configuration conf, String inputUri, Path outputPath) throws IOException { + FileSystem fs = FileSystem.get(URI.create(inputUri), conf); + Path inputPath = new Path(inputUri); CompressionCodecFactory factory = new CompressionCodecFactory(conf); CompressionCodec codec = factory.getCodec(inputPath); if (codec == null) { - System.err.println("No codec found for " + uri); + System.err.println("No codec found for " + inputUri); System.exit(1); } - CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension()); + CompressionCodecFactory.removeSuffix(inputUri, codec.getDefaultExtension()); InputStream gzipInputStream = null; try { gzipInputStream = codec.createInputStream(fs.open(inputPath)); - parseTarSummaries(fs, conf, gzipInputStream, outputPath); + parseTarSummaries(conf, gzipInputStream, outputPath); } finally { Log.debug("Closing gzip stream"); @@ -52,7 +56,7 @@ public class SummariesDecompressor { } private static void parseTarSummaries( - FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) { + Configuration conf, InputStream gzipInputStream, Path outputPath) { int counter = 0; int nameFound = 0; int surnameFound = 0; @@ -163,7 +167,7 @@ public class SummariesDecompressor { } public static void extractXML(Configuration conf, String inputUri, Path outputPath) - throws Exception { + throws IOException, VtdException, ParseException { String uri = inputUri; FileSystem fs = FileSystem.get(URI.create(uri), conf); Path inputPath = new Path(uri); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java index a2342f7b4..9eb73b240 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java @@ -7,6 +7,9 @@ import eu.dnetlib.dhp.schema.orcid.WorkDetail; public class JsonHelper { + private JsonHelper() { + } + public static String createOidWork(WorkDetail workData) { return new Gson().toJson(workData); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java index e1a913476..bbd2e1f7e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java @@ -2,10 +2,8 @@ package eu.dnetlib.doiboost.orcid.util; import java.io.*; -import java.net.URI; import java.nio.charset.StandardCharsets; -import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -14,42 +12,39 @@ import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.gson.Gson; - -import eu.dnetlib.doiboost.orcid.SparkDownloadOrcidAuthors; - public class HDFSUtil { static Logger logger = LoggerFactory.getLogger(HDFSUtil.class); + private HDFSUtil() { + } + private static FileSystem getFileSystem(String hdfsServerUri) throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsServerUri); - FileSystem fileSystem = FileSystem.get(conf); - return fileSystem; + return FileSystem.get(conf); } public static String readFromTextFile(String hdfsServerUri, String workingPath, String path) throws IOException { FileSystem fileSystem = getFileSystem(hdfsServerUri); Path toReadPath = new Path(workingPath.concat(path)); if (!fileSystem.exists(toReadPath)) { - throw new RuntimeException("File not exist: " + path); + throw new IOException("File not exist: " + path); } - logger.info("Last_update_path " + toReadPath); + logger.info("Last_update_path {}", toReadPath); FSDataInputStream inputStream = new FSDataInputStream(fileSystem.open(toReadPath)); - BufferedReader br = new BufferedReader(new InputStreamReader(inputStream)); - StringBuffer sb = new StringBuffer(); - try { + try (BufferedReader br = new BufferedReader(new InputStreamReader(inputStream))) { + StringBuilder sb = new StringBuilder(); + String line; while ((line = br.readLine()) != null) { sb.append(line); } - } finally { - br.close(); + + String buffer = sb.toString(); + logger.info("Last_update: {}", buffer); + return buffer; } - String buffer = sb.toString(); - logger.info("Last_update: " + buffer); - return buffer; } public static void writeToTextFile(String hdfsServerUri, String workingPath, String path, String text) @@ -60,8 +55,8 @@ public class HDFSUtil { fileSystem.delete(toWritePath, true); } FSDataOutputStream os = fileSystem.create(toWritePath); - BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8)); - br.write(text); - br.close(); + try (BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8))) { + br.write(text); + } } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java index 52e076105..e8745aa96 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java @@ -1,7 +1,6 @@ package eu.dnetlib.doiboost.orcid.xml; -import java.io.IOException; import java.util.*; import org.apache.commons.lang3.StringUtils; @@ -38,6 +37,9 @@ public class XMLRecordParser { private static final String NS_ERROR = "error"; + private XMLRecordParser() { + } + public static AuthorData VTDParseAuthorData(byte[] bytes) throws VtdException, ParseException { final VTDGen vg = new VTDGen(); @@ -90,46 +92,6 @@ public class XMLRecordParser { authorData.setOtherNames(otherNames); } -// final String creationMethod = VtdUtilityParser.getSingleValue(ap, vn, "//history:creation-method"); -// if (StringUtils.isNoneBlank(creationMethod)) { -// authorData.setCreationMethod(creationMethod); -// } -// -// final String completionDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:completion-date"); -// if (StringUtils.isNoneBlank(completionDate)) { -// authorData.setCompletionDate(completionDate); -// } -// -// final String submissionDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:submission-date"); -// if (StringUtils.isNoneBlank(submissionDate)) { -// authorData.setSubmissionDate(submissionDate); -// } -// -// final String claimed = VtdUtilityParser.getSingleValue(ap, vn, "//history:claimed"); -// if (StringUtils.isNoneBlank(claimed)) { -// authorData.setClaimed(Boolean.parseBoolean(claimed)); -// } -// -// final String verifiedEmail = VtdUtilityParser.getSingleValue(ap, vn, "//history:verified-email"); -// if (StringUtils.isNoneBlank(verifiedEmail)) { -// authorData.setVerifiedEmail(Boolean.parseBoolean(verifiedEmail)); -// } -// -// final String verifiedPrimaryEmail = VtdUtilityParser.getSingleValue(ap, vn, "//history:verified-primary-email"); -// if (StringUtils.isNoneBlank(verifiedPrimaryEmail)) { -// authorData.setVerifiedPrimaryEmail(Boolean.parseBoolean(verifiedPrimaryEmail)); -// } -// -// final String deactivationDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:deactivation-date"); -// if (StringUtils.isNoneBlank(deactivationDate)) { -// authorData.setDeactivationDate(deactivationDate); -// } -// -// final String lastModifiedDate = VtdUtilityParser -// .getSingleValue(ap, vn, "//history:history/common:last-modified-date"); -// if (StringUtils.isNoneBlank(lastModifiedDate)) { -// authorData.setLastModifiedDate(lastModifiedDate); -// } return authorData; } @@ -207,7 +169,7 @@ public class XMLRecordParser { } public static Map retrieveWorkIdLastModifiedDate(byte[] bytes) - throws ParseException, XPathParseException, NavException, XPathEvalException, IOException { + throws ParseException, XPathParseException, NavException, XPathEvalException { final VTDGen vg = new VTDGen(); vg.setDoc(bytes); vg.parse(true); @@ -251,15 +213,15 @@ public class XMLRecordParser { ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL); ap.declareXPathNameSpace(NS_HISTORY, NS_HISTORY_URL); - AuthorData authorData = retrieveAuthorData(ap, vn, bytes); - AuthorHistory authorHistory = retrieveAuthorHistory(ap, vn, bytes); + AuthorData authorData = retrieveAuthorData(ap, vn); + AuthorHistory authorHistory = retrieveAuthorHistory(ap, vn); AuthorSummary authorSummary = new AuthorSummary(); authorSummary.setAuthorData(authorData); authorSummary.setAuthorHistory(authorHistory); return authorSummary; } - private static AuthorData retrieveAuthorData(AutoPilot ap, VTDNav vn, byte[] bytes) + private static AuthorData retrieveAuthorData(AutoPilot ap, VTDNav vn) throws VtdException { AuthorData authorData = new AuthorData(); final List errors = VtdUtilityParser.getTextValue(ap, vn, "//error:response-code"); @@ -300,7 +262,7 @@ public class XMLRecordParser { return authorData; } - private static AuthorHistory retrieveAuthorHistory(AutoPilot ap, VTDNav vn, byte[] bytes) + private static AuthorHistory retrieveAuthorHistory(AutoPilot ap, VTDNav vn) throws VtdException { AuthorHistory authorHistory = new AuthorHistory(); final String creationMethod = VtdUtilityParser.getSingleValue(ap, vn, "//history:creation-method"); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java index 124a1b9ef..ddbf71bbb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java @@ -33,6 +33,9 @@ public class ActivitiesDumpReader { private static final int MAX_XML_WORKS_PARSED = -1; private static final int XML_WORKS_PARSED_COUNTER_LOG_INTERVAL = 100000; + private ActivitiesDumpReader() { + } + public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath) throws Exception { String uri = inputUri; @@ -48,7 +51,7 @@ public class ActivitiesDumpReader { InputStream gzipInputStream = null; try { gzipInputStream = codec.createInputStream(fs.open(inputPath)); - parseTarActivities(fs, conf, gzipInputStream, outputPath); + parseTarActivities(conf, gzipInputStream, outputPath); } finally { Log.debug("Closing gzip stream"); @@ -56,8 +59,7 @@ public class ActivitiesDumpReader { } } - private static void parseTarActivities( - FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) { + private static void parseTarActivities(Configuration conf, InputStream gzipInputStream, Path outputPath) { int counter = 0; int noDoiFound = 0; int errorFromOrcidFound = 0; @@ -73,7 +75,7 @@ public class ActivitiesDumpReader { SequenceFile.Writer.valueClass(Text.class))) { while ((entry = tais.getNextTarEntry()) != null) { String filename = entry.getName(); - StringBuffer buffer = new StringBuffer(); + StringBuilder builder = new StringBuilder(); try { if (entry.isDirectory() || !filename.contains("works")) { @@ -83,12 +85,12 @@ public class ActivitiesDumpReader { BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from // tarInput String line; - buffer = new StringBuffer(); + builder = new StringBuilder(); while ((line = br.readLine()) != null) { - buffer.append(line); + builder.append(line); } WorkDetail workDetail = XMLRecordParserNoDoi - .VTDParseWorkData(buffer.toString().getBytes()); + .VTDParseWorkData(builder.toString().getBytes()); if (workDetail != null) { if (workDetail.getErrorCode() != null) { errorFromOrcidFound += 1; @@ -123,7 +125,7 @@ public class ActivitiesDumpReader { } } else { - Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer); + Log.warn("Data not retrievable [" + entry.getName() + "] " + builder); xmlParserErrorFound += 1; } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java index 4a64124d1..5c23a33a8 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java @@ -3,9 +3,9 @@ package eu.dnetlib.doiboost.orcidnodoi; import java.io.IOException; +import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.mortbay.log.Log; @@ -16,7 +16,6 @@ import eu.dnetlib.doiboost.orcid.OrcidDSManager; * This job generates one sequence file, the key is an orcid identifier and the * value is an orcid publication in json format */ - public class GenOrcidAuthorWork extends OrcidDSManager { private String activitiesFileNameTarGz; @@ -30,13 +29,12 @@ public class GenOrcidAuthorWork extends OrcidDSManager { public void generateAuthorsDOIsData() throws Exception { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz); Path outputPath = new Path(hdfsServerUri.concat(workingPath).concat(outputWorksPath)); ActivitiesDumpReader.parseGzActivities(conf, tarGzUri, outputPath); } - private void loadArgs(String[] args) throws Exception { + private void loadArgs(String[] args) throws ParseException, IOException { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index 1d47808ef..db3b14923 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -3,7 +3,7 @@ package eu.dnetlib.doiboost.orcidnodoi; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; +import java.io.FileNotFoundException; import java.util.Arrays; import java.util.List; import java.util.Objects; @@ -14,21 +14,16 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.util.LongAccumulator; -import org.mortbay.log.Log; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.gson.Gson; -import com.google.gson.JsonElement; -import com.google.gson.JsonParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; @@ -75,7 +70,7 @@ public class SparkGenEnrichedOrcidWorks { spark -> { String lastUpdate = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt"); if (StringUtils.isBlank(lastUpdate)) { - throw new RuntimeException("last update info not found"); + throw new FileNotFoundException("last update info not found"); } final String dateOfCollection = lastUpdate.substring(0, 10); JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -86,10 +81,10 @@ public class SparkGenEnrichedOrcidWorks { .textFile(workingPath.concat(orcidDataFolder).concat("/authors/*")) .map(item -> OBJECT_MAPPER.readValue(item, AuthorSummary.class)) .filter(authorSummary -> authorSummary.getAuthorData() != null) - .map(authorSummary -> authorSummary.getAuthorData()) + .map(AuthorSummary::getAuthorData) .rdd(), Encoders.bean(AuthorData.class)); - logger.info("Authors data loaded: " + authorDataset.count()); + logger.info("Authors data loaded: {}", authorDataset.count()); Dataset workDataset = spark .createDataset( @@ -97,7 +92,7 @@ public class SparkGenEnrichedOrcidWorks { .textFile(workingPath.concat(orcidDataFolder).concat("/works/*")) .map(item -> OBJECT_MAPPER.readValue(item, Work.class)) .filter(work -> work.getWorkDetail() != null) - .map(work -> work.getWorkDetail()) + .map(Work::getWorkDetail) .filter(work -> work.getErrorCode() == null) .filter( work -> work @@ -107,7 +102,7 @@ public class SparkGenEnrichedOrcidWorks { .noneMatch(e -> e.getType().equalsIgnoreCase("doi"))) .rdd(), Encoders.bean(WorkDetail.class)); - logger.info("Works data loaded: " + workDataset.count()); + logger.info("Works data loaded: {}", workDataset.count()); final LongAccumulator warnNotFoundContributors = spark .sparkContext() @@ -122,7 +117,7 @@ public class SparkGenEnrichedOrcidWorks { WorkDetail w = value._1; AuthorData a = value._2; if (w.getContributors() == null - || (w.getContributors() != null && w.getContributors().size() == 0)) { + || (w.getContributors() != null && w.getContributors().isEmpty())) { Contributor c = new Contributor(); c.setName(a.getName()); c.setSurname(a.getSurname()); @@ -163,7 +158,6 @@ public class SparkGenEnrichedOrcidWorks { final PublicationToOaf publicationToOaf = new PublicationToOaf( parsedPublications, enrichedPublications, - errorsGeneric, errorsInvalidTitle, errorsNotFoundAuthors, errorsInvalidType, @@ -172,13 +166,10 @@ public class SparkGenEnrichedOrcidWorks { titleNotProvidedAcc, noUrlAcc, dateOfCollection); + JavaRDD oafPublicationRDD = enrichedWorksRDD - .map( - e -> { - return (Publication) publicationToOaf - .generatePublicationActionsFromJson(e._2()); - }) - .filter(p -> p != null); + .map(e -> (Publication) publicationToOaf.generatePublicationActionsFromJson(e._2())) + .filter(Objects::nonNull); sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress", "true"); @@ -186,7 +177,7 @@ public class SparkGenEnrichedOrcidWorks { .mapToPair( p -> new Tuple2<>(p.getClass().toString(), OBJECT_MAPPER.writeValueAsString(new AtomicAction<>(Publication.class, p)))) - .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) + .mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))) .saveAsNewAPIHadoopFile( outputEnrichedWorksPath, Text.class, @@ -194,17 +185,17 @@ public class SparkGenEnrichedOrcidWorks { SequenceFileOutputFormat.class, sc.hadoopConfiguration()); - logger.info("parsedPublications: " + parsedPublications.value().toString()); - logger.info("enrichedPublications: " + enrichedPublications.value().toString()); - logger.info("warnNotFoundContributors: " + warnNotFoundContributors.value().toString()); - logger.info("errorsGeneric: " + errorsGeneric.value().toString()); - logger.info("errorsInvalidTitle: " + errorsInvalidTitle.value().toString()); - logger.info("errorsNotFoundAuthors: " + errorsNotFoundAuthors.value().toString()); - logger.info("errorsInvalidType: " + errorsInvalidType.value().toString()); - logger.info("otherTypeFound: " + otherTypeFound.value().toString()); - logger.info("deactivatedAcc: " + deactivatedAcc.value().toString()); - logger.info("titleNotProvidedAcc: " + titleNotProvidedAcc.value().toString()); - logger.info("noUrlAcc: " + noUrlAcc.value().toString()); + logger.info("parsedPublications: {}", parsedPublications.value()); + logger.info("enrichedPublications: {}", enrichedPublications.value()); + logger.info("warnNotFoundContributors: {}", warnNotFoundContributors.value()); + logger.info("errorsGeneric: {}", errorsGeneric.value()); + logger.info("errorsInvalidTitle: {}", errorsInvalidTitle.value()); + logger.info("errorsNotFoundAuthors: {}", errorsNotFoundAuthors.value()); + logger.info("errorsInvalidType: {}", errorsInvalidType.value()); + logger.info("otherTypeFound: {}", otherTypeFound.value()); + logger.info("deactivatedAcc: {}", deactivatedAcc.value()); + logger.info("titleNotProvidedAcc: {}", titleNotProvidedAcc.value()); + logger.info("noUrlAcc: {}", noUrlAcc.value()); }); } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java index 23e9dd884..33f3b3bbb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java @@ -18,6 +18,9 @@ public class JsonWriter { public static final com.fasterxml.jackson.databind.ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); + private JsonWriter() { + } + public static String create(AuthorData authorData) throws JsonProcessingException { return OBJECT_MAPPER.writeValueAsString(authorData); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index 215753899..f92040c24 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -18,6 +18,7 @@ import com.google.gson.*; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility; @@ -26,21 +27,19 @@ import eu.dnetlib.doiboost.orcidnodoi.util.Pair; /** * This class converts an orcid publication from json format to oaf */ - public class PublicationToOaf implements Serializable { static Logger logger = LoggerFactory.getLogger(PublicationToOaf.class); - public final static String orcidPREFIX = "orcid_______"; + public static final String orcidPREFIX = "orcid_______"; public static final String OPENAIRE_PREFIX = "openaire____"; - public static final String SEPARATOR = "::"; + public static final String SEPARATOR = IdentifierFactory.ID_SEPARATOR; public static final String DEACTIVATED_NAME = "Given Names Deactivated"; public static final String DEACTIVATED_SURNAME = "Family Name Deactivated"; private String dateOfCollection = ""; private final LongAccumulator parsedPublications; private final LongAccumulator enrichedPublications; - private final LongAccumulator errorsGeneric; private final LongAccumulator errorsInvalidTitle; private final LongAccumulator errorsNotFoundAuthors; private final LongAccumulator errorsInvalidType; @@ -52,7 +51,6 @@ public class PublicationToOaf implements Serializable { public PublicationToOaf( LongAccumulator parsedPublications, LongAccumulator enrichedPublications, - LongAccumulator errorsGeneric, LongAccumulator errorsInvalidTitle, LongAccumulator errorsNotFoundAuthors, LongAccumulator errorsInvalidType, @@ -63,7 +61,6 @@ public class PublicationToOaf implements Serializable { String dateOfCollection) { this.parsedPublications = parsedPublications; this.enrichedPublications = enrichedPublications; - this.errorsGeneric = errorsGeneric; this.errorsInvalidTitle = errorsInvalidTitle; this.errorsNotFoundAuthors = errorsNotFoundAuthors; this.errorsInvalidType = errorsInvalidType; @@ -77,7 +74,6 @@ public class PublicationToOaf implements Serializable { public PublicationToOaf() { this.parsedPublications = null; this.enrichedPublications = null; - this.errorsGeneric = null; this.errorsInvalidTitle = null; this.errorsNotFoundAuthors = null; this.errorsInvalidType = null; @@ -88,19 +84,8 @@ public class PublicationToOaf implements Serializable { this.dateOfCollection = null; } - private static final Map> datasources = new HashMap>() { - - { - put( - ModelConstants.ORCID, - new Pair<>(ModelConstants.ORCID.toUpperCase(), OPENAIRE_PREFIX + SEPARATOR + ModelConstants.ORCID)); - - } - }; - // json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname private static final Map> externalIds = new HashMap>() { - { put("ark".toLowerCase(), new Pair<>("ark", "ark")); put("arxiv".toLowerCase(), new Pair<>("arXiv", "arXiv")); @@ -208,10 +193,8 @@ public class PublicationToOaf implements Serializable { .setTitle( titles .stream() - .map(t -> { - return mapStructuredProperty(t, ModelConstants.MAIN_TITLE_QUALIFIER, null); - }) - .filter(s -> s != null) + .map(t -> mapStructuredProperty(t, ModelConstants.MAIN_TITLE_QUALIFIER, null)) + .filter(Objects::nonNull) .collect(Collectors.toList())); // Adding identifier final String id = getStringValue(rootElement, "id"); @@ -226,7 +209,7 @@ public class PublicationToOaf implements Serializable { publication.setId(sourceId); // Adding relevant date - settingRelevantDate(rootElement, publication, "publication_date", "issued", true); + settingRelevantDate(rootElement, publication, "issued", true); // Adding collectedfrom publication.setCollectedfrom(Arrays.asList(createCollectedFrom())); @@ -243,7 +226,7 @@ public class PublicationToOaf implements Serializable { Map publicationType = typologiesMapping.get(type); if ((publicationType == null || publicationType.isEmpty()) && errorsInvalidType != null) { errorsInvalidType.add(1); - logger.error("publication_type_not_found: " + type); + logger.error("publication_type_not_found: {}", type); return null; } @@ -307,7 +290,7 @@ public class PublicationToOaf implements Serializable { // Adding authors final List authors = createAuthors(rootElement); - if (authors != null && authors.size() > 0) { + if (authors != null && !authors.isEmpty()) { if (authors.stream().filter(a -> { return ((Objects.nonNull(a.getName()) && a.getName().equals(DEACTIVATED_NAME)) || (Objects.nonNull(a.getSurname()) && a.getSurname().equals(DEACTIVATED_SURNAME))); @@ -322,8 +305,7 @@ public class PublicationToOaf implements Serializable { } else { if (authors == null) { Gson gson = new GsonBuilder().setPrettyPrinting().create(); - String json = gson.toJson(rootElement); - throw new RuntimeException("not_valid_authors: " + json); + throw new RuntimeException("not_valid_authors: " + gson.toJson(rootElement)); } else { if (errorsNotFoundAuthors != null) { errorsNotFoundAuthors.add(1); @@ -434,7 +416,6 @@ public class PublicationToOaf implements Serializable { private void settingRelevantDate(final JsonObject rootElement, final Publication publication, - final String jsonKey, final String dictionaryKey, final boolean addToDateOfAcceptance) { @@ -450,10 +431,8 @@ public class PublicationToOaf implements Serializable { Arrays .asList(pubDate) .stream() - .map(r -> { - return mapStructuredProperty(r, q, null); - }) - .filter(s -> s != null) + .map(r -> mapStructuredProperty(r, q, null)) + .filter(Objects::nonNull) .collect(Collectors.toList())); } } @@ -498,7 +477,7 @@ public class PublicationToOaf implements Serializable { final String type = getStringValue(rootElement, "type"); if (!typologiesMapping.containsKey(type)) { - logger.error("unknowntype_" + type); + logger.error("unknowntype_{}", type); if (errorsInvalidType != null) { errorsInvalidType.add(1); } @@ -550,7 +529,7 @@ public class PublicationToOaf implements Serializable { } private StructuredProperty mapStructuredProperty(String value, Qualifier qualifier, DataInfo dataInfo) { - if (value == null | StringUtils.isBlank(value)) { + if (value == null || StringUtils.isBlank(value)) { return null; } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java index fff753ff3..e69b496b7 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java @@ -1,26 +1,14 @@ package eu.dnetlib.doiboost.orcidnodoi.similarity; -import java.io.IOException; import java.text.Normalizer; import java.util.*; import org.apache.commons.lang3.StringUtils; import org.apache.commons.text.similarity.JaroWinklerSimilarity; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import com.ximpleware.NavException; -import com.ximpleware.ParseException; -import com.ximpleware.XPathEvalException; -import com.ximpleware.XPathParseException; - -import eu.dnetlib.dhp.parser.utility.VtdException; import eu.dnetlib.dhp.schema.orcid.AuthorData; import eu.dnetlib.dhp.schema.orcid.Contributor; -import eu.dnetlib.dhp.schema.orcid.WorkDetail; /** * This class is used for searching from a list of publication contributors a @@ -29,18 +17,16 @@ import eu.dnetlib.dhp.schema.orcid.WorkDetail; * the match is found (if exist) author informations are used to enrich the * matched contribuotr inside contributors list */ - public class AuthorMatcher { - private static final Logger logger = LoggerFactory.getLogger(AuthorMatcher.class); - public static final Double threshold = 0.8; + public static final Double THRESHOLD = 0.8; - public static void match(AuthorData author, List contributors) - throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { + private AuthorMatcher() { + } + public static void match(AuthorData author, List contributors) { int matchCounter = 0; List matchCounters = Arrays.asList(matchCounter); - Contributor contributor = null; contributors .stream() .filter(c -> !StringUtils.isBlank(c.getCreditName())) @@ -62,8 +48,8 @@ public class AuthorMatcher { c.setScore(bestMatch(author.getName(), author.getSurname(), c.getCreditName())); return c; }) - .filter(c -> c.getScore() >= threshold) - .max(Comparator.comparing(c -> c.getScore())); + .filter(c -> c.getScore() >= THRESHOLD) + .max(Comparator.comparing(Contributor::getScore)); Contributor bestMatchContributor = null; if (optCon.isPresent()) { bestMatchContributor = optCon.get(); @@ -73,14 +59,14 @@ public class AuthorMatcher { } else if (matchCounters.get(0) > 1) { Optional optCon = contributors .stream() - .filter(c -> c.isSimpleMatch()) + .filter(Contributor::isSimpleMatch) .filter(c -> !StringUtils.isBlank(c.getCreditName())) .map(c -> { c.setScore(bestMatch(author.getName(), author.getSurname(), c.getCreditName())); return c; }) - .filter(c -> c.getScore() >= threshold) - .max(Comparator.comparing(c -> c.getScore())); + .filter(c -> c.getScore() >= THRESHOLD) + .max(Comparator.comparing(Contributor::getScore)); Contributor bestMatchContributor = null; if (optCon.isPresent()) { bestMatchContributor = optCon.get(); @@ -92,7 +78,7 @@ public class AuthorMatcher { } public static boolean simpleMatchOnOtherNames(String name, List otherNames) { - if (otherNames == null || (otherNames != null && otherNames.isEmpty())) { + if (otherNames == null || otherNames.isEmpty()) { return false; } return otherNames.stream().filter(o -> simpleMatch(name, o)).count() > 0; @@ -132,8 +118,7 @@ public class AuthorMatcher { } public static Double similarity(String nameA, String surnameA, String nameB, String surnameB) { - Double score = similarityJaroWinkler(nameA, surnameA, nameB, surnameB); - return score; + return similarityJaroWinkler(nameA, surnameA, nameB, surnameB); } private static Double similarityJaroWinkler(String nameA, String surnameA, String nameB, String surnameB) { @@ -179,7 +164,7 @@ public class AuthorMatcher { public static void updateAuthorsSimilarityMatch(List contributors, AuthorData author) { contributors .stream() - .filter(c -> c.isBestMatch()) + .filter(Contributor::isBestMatch) .forEach(c -> { c.setName(author.getName()); c.setSurname(author.getSurname()); @@ -206,9 +191,4 @@ public class AuthorMatcher { } } - private static String toJson(WorkDetail work) { - GsonBuilder builder = new GsonBuilder(); - Gson gson = builder.create(); - return gson.toJson(work); - } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java index 29791bbbd..b4c12eed3 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java @@ -5,9 +5,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import com.ximpleware.*; import eu.dnetlib.dhp.parser.utility.VtdException; @@ -20,21 +17,10 @@ import eu.dnetlib.dhp.schema.orcid.WorkDetail; /** * This class is used for parsing xml data with vtd parser */ - public class XMLRecordParserNoDoi { - private static final Logger logger = LoggerFactory.getLogger(XMLRecordParserNoDoi.class); - private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common"; private static final String NS_COMMON = "common"; - private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person"; - private static final String NS_PERSON = "person"; - private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details"; - private static final String NS_DETAILS = "personal-details"; - private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name"; - private static final String NS_OTHER = "other-name"; - private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record"; - private static final String NS_RECORD = "record"; private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error"; private static final String NS_WORK = "work"; @@ -42,6 +28,9 @@ public class XMLRecordParserNoDoi { private static final String NS_ERROR = "error"; + private XMLRecordParserNoDoi() { + } + public static WorkDetail VTDParseWorkData(byte[] bytes) throws VtdException, ParseException, XPathParseException, NavException, XPathEvalException { @@ -100,16 +89,16 @@ public class XMLRecordParserNoDoi { workData.setUrls(urls); } - workData.setPublicationDates(getPublicationDates(vg, vn, ap)); - workData.setExtIds(getExternalIds(vg, vn, ap)); - workData.setContributors(getContributors(vg, vn, ap)); + workData.setPublicationDates(getPublicationDates(vn, ap)); + workData.setExtIds(getExternalIds(vn, ap)); + workData.setContributors(getContributors(vn, ap)); return workData; } - private static List getPublicationDates(VTDGen vg, VTDNav vn, AutoPilot ap) + private static List getPublicationDates(VTDNav vn, AutoPilot ap) throws XPathParseException, NavException, XPathEvalException { - List publicationDates = new ArrayList(); + List publicationDates = new ArrayList<>(); int yearIndex = 0; ap.selectXPath("//common:publication-date/common:year"); while (ap.evalXPath() != -1) { @@ -142,9 +131,9 @@ public class XMLRecordParserNoDoi { return publicationDates; } - private static List getExternalIds(VTDGen vg, VTDNav vn, AutoPilot ap) + private static List getExternalIds(VTDNav vn, AutoPilot ap) throws XPathParseException, NavException, XPathEvalException { - List extIds = new ArrayList(); + List extIds = new ArrayList<>(); int typeIndex = 0; ap.selectXPath("//common:external-id/common:external-id-type"); while (ap.evalXPath() != -1) { @@ -177,12 +166,12 @@ public class XMLRecordParserNoDoi { if (typeIndex == valueIndex) { return extIds; } - return new ArrayList(); + return new ArrayList<>(); } - private static List getContributors(VTDGen vg, VTDNav vn, AutoPilot ap) + private static List getContributors(VTDNav vn, AutoPilot ap) throws XPathParseException, NavException, XPathEvalException { - List contributors = new ArrayList(); + List contributors = new ArrayList<>(); ap.selectXPath("//work:contributors/work:contributor"); while (ap.evalXPath() != -1) { Contributor contributor = new Contributor(); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index 2b241ed5f..9ea7c6959 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -1,6 +1,7 @@ package eu.dnetlib.doiboost.orcid; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.*; @@ -57,7 +58,7 @@ public class OrcidClientTest { // 'https://api.orcid.org/v3.0/0000-0001-7291-3210/record' @Test - public void downloadTest() throws Exception { + void downloadTest() throws Exception { final String orcid = "0000-0001-7291-3210"; String record = testDownloadRecord(orcid, REQUEST_TYPE_RECORD); String filename = testPath + "/downloaded_record_".concat(orcid).concat(".xml"); @@ -159,18 +160,19 @@ public class OrcidClientTest { } @Test - private void testReadBase64CompressedRecord() throws Exception { + @Disabled + void testReadBase64CompressedRecord() throws Exception { final String base64CompressedRecord = IOUtils .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); final String recordFromSeqFile = ArgumentApplicationParser.decompressValue(base64CompressedRecord); logToFile(testPath, "\n\ndownloaded \n\n" + recordFromSeqFile); final String downloadedRecord = testDownloadRecord("0000-0003-3028-6161", REQUEST_TYPE_RECORD); - assertTrue(recordFromSeqFile.equals(downloadedRecord)); + assertEquals(recordFromSeqFile, downloadedRecord); } @Test @Disabled - public void lambdaFileReaderTest() throws Exception { + void lambdaFileReaderTest() throws Exception { String last_update = "2021-01-12 00:00:06.685137"; TarArchiveInputStream input = new TarArchiveInputStream( new GzipCompressorInputStream(new FileInputStream("/tmp/last_modified.csv.tar"))); @@ -187,7 +189,7 @@ public class OrcidClientTest { while ((line = br.readLine()) != null) { String[] values = line.split(","); List recordInfo = Arrays.asList(values); - assertTrue(recordInfo.size() == 4); + assertEquals(4, recordInfo.size()); String orcid = recordInfo.get(0); String modifiedDate = recordInfo.get(3); rowNum++; @@ -264,7 +266,7 @@ public class OrcidClientTest { } @Test - public void downloadWorkTest() throws Exception { + void downloadWorkTest() throws Exception { String orcid = "0000-0003-0015-1952"; String record = testDownloadRecord(orcid, REQUEST_TYPE_WORK); String filename = "/tmp/downloaded_work_".concat(orcid).concat(".xml"); @@ -274,7 +276,7 @@ public class OrcidClientTest { } @Test - public void downloadRecordTest() throws Exception { + void downloadRecordTest() throws Exception { String orcid = "0000-0001-5004-5918"; String record = testDownloadRecord(orcid, REQUEST_TYPE_RECORD); String filename = "/tmp/downloaded_record_".concat(orcid).concat(".xml"); @@ -284,7 +286,7 @@ public class OrcidClientTest { } @Test - public void downloadWorksTest() throws Exception { + void downloadWorksTest() throws Exception { String orcid = "0000-0001-5004-5918"; String record = testDownloadRecord(orcid, REQUEST_TYPE_WORKS); String filename = "/tmp/downloaded_works_".concat(orcid).concat(".xml"); @@ -294,7 +296,7 @@ public class OrcidClientTest { } @Test - public void downloadSingleWorkTest() throws Exception { + void downloadSingleWorkTest() throws Exception { String orcid = "0000-0001-5004-5918"; String record = testDownloadRecord(orcid, REQUEST_TYPE_WORK); String filename = "/tmp/downloaded_work_47652866_".concat(orcid).concat(".xml"); @@ -304,7 +306,7 @@ public class OrcidClientTest { } @Test - public void cleanAuthorListTest() throws Exception { + void cleanAuthorListTest() throws Exception { AuthorData a1 = new AuthorData(); a1.setOid("1"); a1.setName("n1"); @@ -333,7 +335,7 @@ public class OrcidClientTest { @Test @Ignore - public void testUpdatedRecord() throws Exception { + void testUpdatedRecord() throws Exception { final String base64CompressedRecord = IOUtils .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); final String record = ArgumentApplicationParser.decompressValue(base64CompressedRecord); @@ -342,7 +344,7 @@ public class OrcidClientTest { @Test @Ignore - private void testUpdatedWork() throws Exception { + void testUpdatedWork() throws Exception { final String base64CompressedWork = "H4sIAAAAAAAAAM1XS2/jNhC+51cQOuxJsiXZSR03Vmq0G6Bo013E6R56oyXaZiOJWpKy4y783zvUg5Ksh5uiCJogisX5Zjj85sHx3f1rFKI94YKyeGE4I9tAJPZZQOPtwvj9+cGaGUhIHAc4ZDFZGEcijHvv6u7A+MtcPVCSSgsUQObYzuzaccBEguVuYYxt+LHgbwKP6a11M3WnY6UzrpB7KuiahlQeF0aSrkPqGwhcisWcxpLwGIcLYydlMh+PD4fDiHGfBvDcjmMxLhGlBglSH8vsIH0qGlLqBFRIGvvDWjWQ1iMJJ2CKBANqGlNqMbkj3IpxRPq1KkypFZFoDRHa0aRfq8JoNjhnfIAJJS6xPouiIQJyeYmGQzE+cO5cXqITcItBlKyASExD0a93jiwtvJDjYXDDAqBPHoH2wMmVWGNf8xyyaEBiSTeUDHHWBpd2Nmmc10yfbgHQrHCyIRxKjQwRUoFKPRwEnIgBnQJQVdGeQgJaCRN0OMnPkaUFVbD9WkpaIndQJowf+8EFoIpTErJjBFQOBavElFpfUxwC9ZcqvQErdQXhe+oPFF8BaObupYzVsYEOARzSoZBWmKqaBMHcV0Wf8oG0beIqD+Gdkz0lhyE3NajUW6fhQFSV9Nw/MCBYyofYa0EN7wrBz13eP+Y+J6obWgE8Pdd2JpYD94P77Ezmjj13b0bu5PqPu3EXumEnxEJaEVxSUIHammsra+53z44zt2/m1/bItaeVtQ6dhs3c4XytvW75IYUchMKvEHVUyqmnWBFAS0VJrqSvQde6vp251ux2NtFuKcVOi+oK9YY0M0Cn6o4J6WkvtEK2XJ1vfPGAZxSoK8lb+SxJBbLQx1CohOLndjJUywQWUFmqEi3G6Zaqf/7buOyYJd5IYpfmf0XipfP18pDR9cQCeEuJQI/Lx36bFbVnpBeL2UwmqQw7ApAvf4GeGGQdEbENgolui/wdpjHaYCmPCIPPAmGBIsxfoLUhyRCB0SeCakEBJRKBtfJ+UBbI15TG4PaGBAhWthx8DmFYtHZQujv1CWbLLdzmmUKmHEOWCe1/zdu78bn/+YH+hCOqOzcXfFwuP6OVT/P710crwqGXFrpNaM2GT3MXarw01i15TIi3pmtJXgtbTVGf3h6HKfF+wBAnPyTfdCChudlm5gZaoG//F9pPZsGQcqqbyZN5hBau5OoIJ3PPwjTKDuG4s5MZp2rMzF5PZoK34IT6PIFOPrk+mTiVO5aJH2C+JJRjE/06eoRfpJxa4VgyYaLlaJUv/EhCfATMU/76gEOfmehL/qbJNNHjaFna+CQYB8wvo9PpPFJ5MOrJ1Ix7USBZqBl7KRNOx1d3jex7SG6zuijqCMWRusBsncjZSrM2u82UJmqzpGhvUJN2t6caIM9QQgO9c0t40UROnWsJd2Rbs+nsxpna9u30ttNkjechmzHjEST+X5CkkuNY0GzQkzyFseAf7lSZuLwdh1xSXKvvQJ4g4abTYgPV7uMt3rskohlJmMa82kQkshtyBEIYqQ+YB8X3oRHg7iFKi/bZP+Ao+T6BJhIT/vNPi8ffZs+flk+r2v0WNroZiyWn6xRmadHqTJXsjLJczElAZX6TnJdoWTM1SI2gfutv3rjeBt5t06rVvNuWup29246tlvluO+u2/G92bK9DXheL6uFd/Q3EaRDZqBIAAA=="; final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork); logToFile(testPath, "\n\nwork updated \n\n" + work); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java index 235db52d4..78760fa96 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java @@ -24,10 +24,7 @@ import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; public class XMLRecordParserTest { - private static final String NS_WORK = "work"; - private static final String NS_WORK_URL = "http://www.orcid.org/ns/work"; - private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common"; - private static final String NS_COMMON = "common"; + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static Path testPath; @@ -38,12 +35,10 @@ public class XMLRecordParserTest { } @Test - public void testOrcidAuthorDataXMLParser() throws Exception { + void testOrcidAuthorDataXMLParser() throws Exception { String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_0000-0001-6828-479X.xml")); - XMLRecordParser p = new XMLRecordParser(); - AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes()); assertNotNull(authorData); assertNotNull(authorData.getName()); @@ -54,12 +49,10 @@ public class XMLRecordParserTest { } @Test - public void testOrcidXMLErrorRecordParser() throws Exception { + void testOrcidXMLErrorRecordParser() throws Exception { String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_error.xml")); - XMLRecordParser p = new XMLRecordParser(); - AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes()); assertNotNull(authorData); assertNotNull(authorData.getErrorCode()); @@ -67,14 +60,12 @@ public class XMLRecordParserTest { } @Test - public void testOrcidWorkDataXMLParser() throws Exception { + void testOrcidWorkDataXMLParser() throws Exception { String xml = IOUtils .toString( this.getClass().getResourceAsStream("activity_work_0000-0003-2760-1191.xml")); - XMLRecordParser p = new XMLRecordParser(); - WorkData workData = XMLRecordParser.VTDParseWorkData(xml.getBytes()); assertNotNull(workData); assertNotNull(workData.getOid()); @@ -83,7 +74,7 @@ public class XMLRecordParserTest { } @Test - public void testOrcidOtherNamesXMLParser() throws Exception { + void testOrcidOtherNamesXMLParser() throws Exception { String xml = IOUtils .toString( @@ -91,30 +82,13 @@ public class XMLRecordParserTest { AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes()); assertNotNull(authorData); assertNotNull(authorData.getOtherNames()); - assertTrue(authorData.getOtherNames().get(0).equals("Andrew C. Porteus")); + assertEquals("Andrew C. Porteus", authorData.getOtherNames().get(0)); String jsonData = JsonWriter.create(authorData); assertNotNull(jsonData); } -// @Test -// private void testWorkIdLastModifiedDateXMLParser() throws Exception { -// String xml = IOUtils -// .toString( -// this.getClass().getResourceAsStream("record_0000-0001-5004-5918.xml")); -// Map workIdLastModifiedDate = XMLRecordParser.retrieveWorkIdLastModifiedDate(xml.getBytes()); -// workIdLastModifiedDate.forEach((k, v) -> { -// try { -// OrcidClientTest -// .logToFile( -// k + " " + v + " isModified after " + SparkDownloadOrcidWorks.lastUpdateValue + ": " -// + SparkDownloadOrcidWorks.isModified("0000-0001-5004-5918", v)); -// } catch (IOException e) { -// } -// }); -// } - @Test - public void testAuthorSummaryXMLParser() throws Exception { + void testAuthorSummaryXMLParser() throws Exception { String xml = IOUtils .toString( this.getClass().getResourceAsStream("record_0000-0001-5004-5918.xml")); @@ -124,7 +98,7 @@ public class XMLRecordParserTest { } @Test - public void testWorkDataXMLParser() throws Exception { + void testWorkDataXMLParser() throws Exception { String xml = IOUtils .toString( this.getClass().getResourceAsStream("activity_work_0000-0003-2760-1191.xml")); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java index 01e26dcb4..54c16b5d7 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java @@ -4,6 +4,7 @@ package eu.dnetlib.doiboost.orcidnodoi; import static org.junit.jupiter.api.Assertions.*; import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -11,36 +12,36 @@ import org.slf4j.LoggerFactory; import com.google.gson.JsonElement; import com.google.gson.JsonParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; -import jdk.nashorn.internal.ir.annotations.Ignore; -public class PublicationToOafTest { +class PublicationToOafTest { private static final Logger logger = LoggerFactory.getLogger(PublicationToOafTest.class); @Test - @Ignore - private void convertOafPublicationTest() throws Exception { + @Disabled + void convertOafPublicationTest() throws Exception { String jsonPublication = IOUtils .toString( PublicationToOafTest.class.getResourceAsStream("publication.json")); JsonElement j = new JsonParser().parse(jsonPublication); - logger.info("json publication loaded: " + j.toString()); + logger.info("json publication loaded: {}", j.toString()); PublicationToOaf publicationToOaf = new PublicationToOaf(); Publication oafPublication = (Publication) publicationToOaf .generatePublicationActionsFromDump(j.getAsJsonObject()); assertNotNull(oafPublication.getId()); assertNotNull(oafPublication.getOriginalId()); - assertEquals(oafPublication.getOriginalId().get(0), "60153327"); - logger.info("oafPublication.getId(): " + oafPublication.getId()); + assertEquals("60153327", oafPublication.getOriginalId().get(0)); + logger.info("oafPublication.getId(): {}", oafPublication.getId()); assertEquals( - oafPublication.getTitle().get(0).getValue(), - "Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp"); + "Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp", + oafPublication.getTitle().get(0).getValue()); assertNotNull(oafPublication.getLastupdatetimestamp()); assertNotNull(oafPublication.getDateofcollection()); assertNotNull(oafPublication.getDateoftransformation()); - assertTrue(oafPublication.getAuthor().size() == 7); + assertEquals(7, oafPublication.getAuthor().size()); oafPublication.getAuthor().forEach(a -> { assertNotNull(a.getFullname()); assertNotNull(a.getRank()); @@ -64,15 +65,15 @@ public class PublicationToOafTest { if (oafPublication.getExternalReference() != null) { oafPublication.getExternalReference().forEach(e -> { assertNotNull(e.getRefidentifier()); - assertEquals(e.getQualifier().getSchemeid(), "dnet:pid_types"); + assertEquals(ModelConstants.DNET_PID_TYPES, e.getQualifier().getSchemeid()); }); } assertNotNull(oafPublication.getInstance()); oafPublication.getInstance().forEach(i -> { assertNotNull(i.getInstancetype().getClassid()); - logger.info("i.getInstancetype().getClassid(): " + i.getInstancetype().getClassid()); + logger.info("i.getInstancetype().getClassid(): {}", i.getInstancetype().getClassid()); assertNotNull(i.getInstancetype().getClassname()); - logger.info("i.getInstancetype().getClassname(): " + i.getInstancetype().getClassname()); + logger.info("i.getInstancetype().getClassname(): {}", i.getInstancetype().getClassname()); }); } } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java index 54c2d6217..99ec656d5 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java @@ -1,8 +1,7 @@ package eu.dnetlib.doiboost.orcidnodoi.xml; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import java.io.IOException; import java.util.*; @@ -25,7 +24,7 @@ import eu.dnetlib.dhp.schema.orcid.Contributor; import eu.dnetlib.dhp.schema.orcid.WorkDetail; import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; -public class OrcidNoDoiTest { +class OrcidNoDoiTest { private static final Logger logger = LoggerFactory.getLogger(OrcidNoDoiTest.class); @@ -34,7 +33,7 @@ public class OrcidNoDoiTest { static String orcidIdA = "0000-0003-2760-1191"; @Test - public void readPublicationFieldsTest() + void readPublicationFieldsTest() throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { logger.info("running loadPublicationFieldsTest ...."); String xml = IOUtils @@ -44,10 +43,6 @@ public class OrcidNoDoiTest { if (xml == null) { logger.info("Resource not found"); } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } WorkDetail workData = null; try { workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); @@ -87,7 +82,7 @@ public class OrcidNoDoiTest { } @Test - public void authorDoubleMatchTest() throws Exception { + void authorDoubleMatchTest() throws Exception { logger.info("running authorSimpleMatchTest ...."); String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml"; AuthorData author = new AuthorData(); @@ -98,71 +93,49 @@ public class OrcidNoDoiTest { .toString( OrcidNoDoiTest.class.getResourceAsStream(orcidWork)); - if (xml == null) { - logger.info("Resource not found"); - } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } - WorkDetail workData = null; - try { - workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); - } catch (Exception e) { - logger.error("parsing xml", e); - } + WorkDetail workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); + assertNotNull(workData); Contributor a = workData.getContributors().get(0); - assertTrue(a.getCreditName().equals("Abdel-Dayem K")); + assertEquals("Abdel-Dayem K", a.getCreditName()); AuthorMatcher.match(author, workData.getContributors()); - assertTrue(workData.getContributors().size() == 6); + assertEquals(6, workData.getContributors().size()); } @Test - public void readContributorsTest() + void readContributorsTest() throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { logger.info("running loadPublicationFieldsTest ...."); String xml = IOUtils .toString( OrcidNoDoiTest.class.getResourceAsStream("activity_work_0000-0003-2760-1191_contributors.xml")); - if (xml == null) { - logger.info("Resource not found"); - } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } - WorkDetail workData = null; - try { - workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); - } catch (Exception e) { - logger.error("parsing xml", e); - } + WorkDetail workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); + assertNotNull(workData.getContributors()); - assertTrue(workData.getContributors().size() == 5); + assertEquals(5, workData.getContributors().size()); assertTrue(StringUtils.isBlank(workData.getContributors().get(0).getCreditName())); - assertTrue(workData.getContributors().get(0).getSequence().equals("seq0")); - assertTrue(workData.getContributors().get(0).getRole().equals("role0")); - assertTrue(workData.getContributors().get(1).getCreditName().equals("creditname1")); + assertEquals("seq0", workData.getContributors().get(0).getSequence()); + assertEquals("role0", workData.getContributors().get(0).getRole()); + assertEquals("creditname1", workData.getContributors().get(1).getCreditName()); assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getSequence())); assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getRole())); - assertTrue(workData.getContributors().get(2).getCreditName().equals("creditname2")); - assertTrue(workData.getContributors().get(2).getSequence().equals("seq2")); + assertEquals("creditname2", workData.getContributors().get(2).getCreditName()); + assertEquals("seq2", workData.getContributors().get(2).getSequence()); assertTrue(StringUtils.isBlank(workData.getContributors().get(2).getRole())); - assertTrue(workData.getContributors().get(3).getCreditName().equals("creditname3")); + assertEquals("creditname3", workData.getContributors().get(3).getCreditName()); assertTrue(StringUtils.isBlank(workData.getContributors().get(3).getSequence())); - assertTrue(workData.getContributors().get(3).getRole().equals("role3")); + assertEquals("role3", workData.getContributors().get(3).getRole()); assertTrue(StringUtils.isBlank(workData.getContributors().get(4).getCreditName())); - assertTrue(workData.getContributors().get(4).getSequence().equals("seq4")); - assertTrue(workData.getContributors().get(4).getRole().equals("role4")); + assertEquals("seq4", workData.getContributors().get(4).getSequence()); + assertEquals("role4", workData.getContributors().get(4).getRole()); } @Test - public void authorSimpleMatchTest() throws Exception { + void authorSimpleMatchTest() throws Exception { String orcidWork = "activity_work_0000-0002-5982-8983.xml"; AuthorData author = new AuthorData(); author.setName("Parkhouse"); @@ -175,10 +148,6 @@ public class OrcidNoDoiTest { if (xml == null) { logger.info("Resource not found"); } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } WorkDetail workData = null; try { workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); @@ -188,20 +157,21 @@ public class OrcidNoDoiTest { assertNotNull(workData); Contributor a = workData.getContributors().get(0); - assertTrue(a.getCreditName().equals("Parkhouse, H.")); + assertEquals("Parkhouse, H.", a.getCreditName()); AuthorMatcher.match(author, workData.getContributors()); - assertTrue(workData.getContributors().size() == 2); + assertEquals(2, workData.getContributors().size()); Contributor c = workData.getContributors().get(0); - assertTrue(c.getOid().equals("0000-0002-5982-8983")); - assertTrue(c.getName().equals("Parkhouse")); - assertTrue(c.getSurname().equals("H.")); - assertTrue(c.getCreditName().equals("Parkhouse, H.")); + + assertEquals("0000-0002-5982-8983", c.getOid()); + assertEquals("Parkhouse", c.getName()); + assertEquals("H.", c.getSurname()); + assertEquals("Parkhouse, H.", c.getCreditName()); } @Test - public void match() { + void match() { AuthorData author = new AuthorData(); author.setName("Joe"); @@ -210,7 +180,6 @@ public class OrcidNoDoiTest { Contributor contributor = new Contributor(); contributor.setCreditName("Joe Dodge"); List contributors = Arrays.asList(contributor); - AuthorMatcher am = new AuthorMatcher(); int matchCounter = 0; List matchCounters = Arrays.asList(matchCounter); contributors @@ -225,12 +194,13 @@ public class OrcidNoDoiTest { } }); - assertTrue(matchCounters.get(0) == 1); + assertEquals(1, matchCounters.get(0)); AuthorMatcher.updateAuthorsSimpleMatch(contributors, author); - assertTrue(contributors.get(0).getName().equals("Joe")); - assertTrue(contributors.get(0).getSurname().equals("Dodge")); - assertTrue(contributors.get(0).getCreditName().equals("Joe Dodge")); - assertTrue(contributors.get(0).getOid().equals("0000-1111-2222-3333")); + + assertEquals("Joe", contributors.get(0).getName()); + assertEquals("Dodge", contributors.get(0).getSurname()); + assertEquals("Joe Dodge", contributors.get(0).getCreditName()); + assertEquals("0000-1111-2222-3333", contributors.get(0).getOid()); AuthorData authorX = new AuthorData(); authorX.setName(nameA); @@ -259,7 +229,7 @@ public class OrcidNoDoiTest { } }); - assertTrue(matchCounters2.get(0) == 2); + assertEquals(2, matchCounters2.get(0)); assertTrue(contributorList.get(0).isSimpleMatch()); assertTrue(contributorList.get(1).isSimpleMatch()); @@ -271,7 +241,7 @@ public class OrcidNoDoiTest { c.setScore(AuthorMatcher.bestMatch(authorX.getName(), authorX.getSurname(), c.getCreditName())); return c; }) - .filter(c -> c.getScore() >= AuthorMatcher.threshold) + .filter(c -> c.getScore() >= AuthorMatcher.THRESHOLD) .max(Comparator.comparing(c -> c.getScore())); assertTrue(optCon.isPresent()); @@ -281,15 +251,16 @@ public class OrcidNoDoiTest { assertTrue(contributorList.get(0).isBestMatch()); assertTrue(!contributorList.get(1).isBestMatch()); AuthorMatcher.updateAuthorsSimilarityMatch(contributorList, authorX); - assertTrue(contributorList.get(0).getName().equals(nameA)); - assertTrue(contributorList.get(0).getSurname().equals(surnameA)); - assertTrue(contributorList.get(0).getCreditName().equals("Abdel-Dayem Khai")); - assertTrue(contributorList.get(0).getOid().equals(orcidIdA)); + + assertEquals(nameA, contributorList.get(0).getName()); + assertEquals(surnameA, contributorList.get(0).getSurname()); + assertEquals("Abdel-Dayem Khai", contributorList.get(0).getCreditName()); + assertEquals(orcidIdA, contributorList.get(0).getOid()); assertTrue(StringUtils.isBlank(contributorList.get(1).getOid())); } @Test - public void authorBestMatchTest() throws Exception { + void authorBestMatchTest() throws Exception { String name = "Khairy"; String surname = "Abdel Dayem"; String orcidWork = "activity_work_0000-0003-2760-1191.xml"; @@ -304,10 +275,6 @@ public class OrcidNoDoiTest { if (xml == null) { logger.info("Resource not found"); } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } WorkDetail workData = null; try { workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); @@ -315,16 +282,17 @@ public class OrcidNoDoiTest { logger.error("parsing xml", e); } AuthorMatcher.match(author, workData.getContributors()); - assertTrue(workData.getContributors().size() == 5); + assertEquals(5, workData.getContributors().size()); List c = workData.getContributors(); - assertTrue(c.get(0).getName().equals(name)); - assertTrue(c.get(0).getSurname().equals(surname)); - assertTrue(c.get(0).getCreditName().equals("Khair Abde Daye")); - assertTrue(c.get(0).getOid().equals(orcidIdA)); + + assertEquals(name, c.get(0).getName()); + assertEquals(surname, c.get(0).getSurname()); + assertEquals("Khair Abde Daye", c.get(0).getCreditName()); + assertEquals(orcidIdA, c.get(0).getOid()); } @Test - public void otherNamesMatchTest() + void otherNamesMatchTest() throws VtdException, ParseException, IOException, XPathEvalException, NavException, XPathParseException { AuthorData author = new AuthorData(); @@ -341,8 +309,9 @@ public class OrcidNoDoiTest { contributor.setCreditName("XY"); List contributors = Arrays.asList(contributor); AuthorMatcher.match(author, contributors); - assertTrue(contributors.get(0).getName().equals("Joe")); - assertTrue(contributors.get(0).getSurname().equals("Dodge")); - assertTrue(contributors.get(0).getOid().equals("0000-1111-2222-3333")); + + assertEquals("Joe", contributors.get(0).getName()); + assertEquals("Dodge", contributors.get(0).getSurname()); + assertEquals("0000-1111-2222-3333", contributors.get(0).getOid()); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 692605b03..0b4a80b2d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -14,12 +14,17 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.Relation; public class PropagationConstant { + + private PropagationConstant() { + } + public static final String INSTITUTIONAL_REPO_TYPE = "pubsrepository::institutional"; public static final String PROPAGATION_DATA_INFO_TYPE = "propagation"; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java index 0f45d3beb..8e76b5778 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java @@ -5,16 +5,11 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import com.google.gson.Gson; /** Created by miriam on 01/08/2018. */ public class Community implements Serializable { - private static final Log log = LogFactory.getLog(Community.class); - private String id; private List subjects = new ArrayList<>(); private List providers = new ArrayList<>(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java index 844fe2962..5d92f5ab6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java @@ -2,15 +2,9 @@ package eu.dnetlib.dhp.bulktag.community; import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.stream.Collectors; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.gson.Gson; @@ -22,8 +16,6 @@ import eu.dnetlib.dhp.bulktag.criteria.Selection; /** Created by miriam on 02/08/2018. */ public class CommunityConfiguration implements Serializable { - private static final Log log = LogFactory.getLog(CommunityConfiguration.class); - private Map communities; // map subject -> communityid @@ -136,7 +128,7 @@ public class CommunityConfiguration implements Serializable { else return null; }) - .filter(st -> (st != null)) + .filter(Objects::nonNull) .collect(Collectors.toList()); } @@ -161,7 +153,7 @@ public class CommunityConfiguration implements Serializable { private List getContextIds(List> list) { if (list != null) { - return list.stream().map(p -> p.getFst()).collect(Collectors.toList()); + return list.stream().map(Pair::getFst).collect(Collectors.toList()); } return Lists.newArrayList(); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java index 749ed292f..822d35078 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java @@ -13,6 +13,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -31,11 +32,16 @@ public class CommunityConfigurationFactory { private static final VerbResolver resolver = VerbResolverFactory.newInstance(); - public static CommunityConfiguration newInstance(final String xml) throws DocumentException { + private CommunityConfigurationFactory() { + } + + public static CommunityConfiguration newInstance(final String xml) throws DocumentException, SAXException { log.debug(String.format("parsing community configuration from:\n%s", xml)); - final Document doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(xml)); final Map communities = Maps.newHashMap(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java index e0856ae8f..9002e718f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java @@ -13,9 +13,6 @@ public class Constraint implements Serializable { private String value; private Selection selection; - public Constraint() { - } - public String getVerb() { return verb; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java index b56dfaaa3..0f6fab238 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java @@ -19,11 +19,8 @@ import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; /** Created by miriam on 02/08/2018. */ public class Constraints implements Serializable { private static final Log log = LogFactory.getLog(Constraints.class); - // private ConstraintEncapsulator ce; - private List constraint; - public Constraints() { - } + private List constraint; public List getConstraint() { return constraint; @@ -44,13 +41,8 @@ public class Constraints implements Serializable { try { st.setSelection(resolver); - } catch (NoSuchMethodException e) { - log.error(e.getMessage()); - } catch (IllegalAccessException e) { - log.error(e.getMessage()); - } catch (InvocationTargetException e) { - log.error(e.getMessage()); - } catch (InstantiationException e) { + } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException + | InstantiationException e) { log.error(e.getMessage()); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java index a9427b594..cb198dc43 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java @@ -19,10 +19,6 @@ public class Provider implements Serializable { private SelectionConstraints selectionConstraints; - public SelectionConstraints getSelCriteria() { - return selectionConstraints; - } - public SelectionConstraints getSelectionConstraints() { return selectionConstraints; } @@ -31,10 +27,6 @@ public class Provider implements Serializable { this.selectionConstraints = selectionConstraints; } - public void setSelCriteria(SelectionConstraints selCriteria) { - this.selectionConstraints = selCriteria; - } - public String getOpenaireId() { return openaireId; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java index 993a7ef77..89cf5beff 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.bulktag.community; import java.util.List; import org.dom4j.DocumentException; +import org.xml.sax.SAXException; import com.google.common.base.Joiner; @@ -63,7 +64,7 @@ public class QueryInformationSystem { + " "; public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl) - throws ISLookUpException, DocumentException { + throws ISLookUpException, DocumentException, SAXException { ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); final List res = isLookUp.quickSearchProfile(XQUERY); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index c9ff26963..c8b1bc8fe 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -43,7 +43,6 @@ public class ResultTagger implements Serializable { param.put(key, jsonContext.read(params.get(key))); } catch (com.jayway.jsonpath.PathNotFoundException e) { param.put(key, new ArrayList<>()); - // throw e; } } return param; @@ -52,9 +51,6 @@ public class ResultTagger implements Serializable { public R enrichContextCriteria( final R result, final CommunityConfiguration conf, final Map criteria) { - // } - // public Result enrichContextCriteria(final Result result, final CommunityConfiguration - // conf, final Map criteria) { final Map> param = getParamMap(result, criteria); // Verify if the entity is deletedbyinference. In case verify if to clean the context list @@ -74,7 +70,7 @@ public class ResultTagger implements Serializable { result .getSubject() .stream() - .map(subject -> subject.getValue()) + .map(StructuredProperty::getValue) .filter(StringUtils::isNotBlank) .map(String::toLowerCase) .map(String::trim) @@ -90,15 +86,11 @@ public class ResultTagger implements Serializable { if (Objects.nonNull(result.getInstance())) { for (Instance i : result.getInstance()) { - if (Objects.nonNull(i.getCollectedfrom())) { - if (Objects.nonNull(i.getCollectedfrom().getKey())) { - tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|")); - } + if (Objects.nonNull(i.getCollectedfrom()) && Objects.nonNull(i.getCollectedfrom().getKey())) { + tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|")); } - if (Objects.nonNull(i.getHostedby())) { - if (Objects.nonNull(i.getHostedby().getKey())) { - tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|")); - } + if (Objects.nonNull(i.getHostedby()) && Objects.nonNull(i.getHostedby().getKey())) { + tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|")); } } @@ -149,52 +141,46 @@ public class ResultTagger implements Serializable { return result; } - result - .getContext() - .stream() - .map( - c -> { - if (communities.contains(c.getId())) { - Optional> opt_dataInfoList = Optional.ofNullable(c.getDataInfo()); - List dataInfoList; - if (opt_dataInfoList.isPresent()) - dataInfoList = opt_dataInfoList.get(); - else { - dataInfoList = new ArrayList<>(); - c.setDataInfo(dataInfoList); - } - if (subjects.contains(c.getId())) - dataInfoList - .add( - getDataInfo( - BULKTAG_DATA_INFO_TYPE, - CLASS_ID_SUBJECT, - CLASS_NAME_BULKTAG_SUBJECT, - TAGGING_TRUST)); - if (datasources.contains(c.getId())) - dataInfoList - .add( - getDataInfo( - BULKTAG_DATA_INFO_TYPE, - CLASS_ID_DATASOURCE, - CLASS_NAME_BULKTAG_DATASOURCE, - TAGGING_TRUST)); - if (czenodo.contains(c.getId())) - dataInfoList - .add( - getDataInfo( - BULKTAG_DATA_INFO_TYPE, - CLASS_ID_CZENODO, - CLASS_NAME_BULKTAG_ZENODO, - TAGGING_TRUST)); - } - return c; - }) - .collect(Collectors.toList()); + result.getContext().forEach(c -> { + if (communities.contains(c.getId())) { + Optional> opt_dataInfoList = Optional.ofNullable(c.getDataInfo()); + List dataInfoList; + if (opt_dataInfoList.isPresent()) + dataInfoList = opt_dataInfoList.get(); + else { + dataInfoList = new ArrayList<>(); + c.setDataInfo(dataInfoList); + } + if (subjects.contains(c.getId())) + dataInfoList + .add( + getDataInfo( + BULKTAG_DATA_INFO_TYPE, + CLASS_ID_SUBJECT, + CLASS_NAME_BULKTAG_SUBJECT, + TAGGING_TRUST)); + if (datasources.contains(c.getId())) + dataInfoList + .add( + getDataInfo( + BULKTAG_DATA_INFO_TYPE, + CLASS_ID_DATASOURCE, + CLASS_NAME_BULKTAG_DATASOURCE, + TAGGING_TRUST)); + if (czenodo.contains(c.getId())) + dataInfoList + .add( + getDataInfo( + BULKTAG_DATA_INFO_TYPE, + CLASS_ID_CZENODO, + CLASS_NAME_BULKTAG_ZENODO, + TAGGING_TRUST)); + } + }); communities .removeAll( - result.getContext().stream().map(c -> c.getId()).collect(Collectors.toSet())); + result.getContext().stream().map(Context::getId).collect(Collectors.toSet())); if (communities.isEmpty()) return result; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java index 71ff61d1b..c7dcce812 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java @@ -15,9 +15,6 @@ import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; public class SelectionConstraints implements Serializable { private List criteria; - public SelectionConstraints() { - } - public List getCriteria() { return criteria; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java index 80d98bb1a..8274e26c9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java @@ -3,6 +3,9 @@ package eu.dnetlib.dhp.bulktag.community; public class TaggingConstants { + private TaggingConstants() { + } + public static final String BULKTAG_DATA_INFO_TYPE = "bulktagging"; public static final String CLASS_ID_SUBJECT = "community:subject"; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java index bc6b75fba..54c2dc9aa 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java @@ -31,7 +31,6 @@ public class ZenodoCommunity implements Serializable { } private void setSelCriteria(String json) { - // Type collectionType = new TypeToken>(){}.getType(); selCriteria = new Gson().fromJson(json, SelectionConstraints.class); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java index ec9fb716d..9129e6e54 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java @@ -1,7 +1,9 @@ package eu.dnetlib.dhp.bulktag.criteria; -public interface Selection { +import java.io.Serializable; + +public interface Selection extends Serializable { boolean apply(String value); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java index 54176efb6..459ac1ba9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java @@ -7,16 +7,16 @@ import java.util.Map; import java.util.stream.Collectors; import io.github.classgraph.ClassGraph; -import io.github.classgraph.ClassInfo; import io.github.classgraph.ClassInfoList; import io.github.classgraph.ScanResult; public class VerbResolver implements Serializable { - private Map> map = null; // = new HashMap<>(); - private final ClassGraph classgraph = new ClassGraph(); + + private Map> map = null; public VerbResolver() { + final ClassGraph classgraph = new ClassGraph(); try (ScanResult scanResult = // Assign scanResult in try-with-resources classgraph // Create a new ClassGraph instance .verbose() // If you want to enable logging to stderr @@ -41,8 +41,6 @@ public class VerbResolver implements Serializable { .get(0) .getValue(), value -> (Class) value.loadClass())); - } catch (Exception e) { - e.printStackTrace(); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java index 0bb801999..446ad5fbc 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java @@ -3,6 +3,9 @@ package eu.dnetlib.dhp.bulktag.criteria; public class VerbResolverFactory { + private VerbResolverFactory() { + } + public static VerbResolver newInstance() { return new VerbResolver(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index 04a659a1c..ddc7f93f7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -6,11 +6,10 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Arrays; import java.util.List; +import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -18,11 +17,11 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Relation; /** * For the association of the country to the datasource The association is computed only for datasource of specific type @@ -77,16 +76,16 @@ public class PrepareDatasourceCountryAssociation { List allowedtypes, String inputPath, String outputPath) { - String whitelisted = " d.id = '" + whitelist.get(0) + "'"; - for (int i = 1; i < whitelist.size(); i++) { - whitelisted += " OR d.id = '" + whitelist.get(i) + "'"; - } - String allowed = "d.datasourcetype.classid = '" + allowedtypes.get(0) + "'"; + final String whitelisted = whitelist + .stream() + .map(id -> " d.id = '" + id + "'") + .collect(Collectors.joining(" OR ")); - for (int i = 1; i < allowedtypes.size(); i++) { - allowed += " OR d.datasourcetype.classid = '" + allowedtypes.get(i) + "'"; - } + final String allowed = allowedtypes + .stream() + .map(type -> " d.datasourcetype.classid = '" + type + "'") + .collect(Collectors.joining(" OR ")); Dataset datasource = readPath(spark, inputPath + "/datasource", Datasource.class); Dataset relation = readPath(spark, inputPath + "/relation", Relation.class); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java index 8d0d6c48b..77f7288f6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java @@ -85,13 +85,12 @@ public class PrepareResultCountrySet { Dataset result = readPath(spark, inputPath, resultClazz); result.createOrReplaceTempView("result"); - // log.info("number of results: {}", result.count()); + createCfHbforResult(spark); Dataset datasource_country = readPath(spark, datasourcecountrypath, DatasourceCountry.class); datasource_country.createOrReplaceTempView("datasource_country"); - // log.info("datasource_country number : {}", datasource_country.count()); spark .sql(RESULT_COUNTRYSET_QUERY) @@ -102,7 +101,7 @@ public class PrepareResultCountrySet { ArrayList countryList = a.getCountrySet(); Set countryCodes = countryList .stream() - .map(country -> country.getClassid()) + .map(CountrySbs::getClassid) .collect(Collectors.toSet()); b .getCountrySet() @@ -119,10 +118,6 @@ public class PrepareResultCountrySet { }) .map(couple -> OBJECT_MAPPER.writeValueAsString(couple._2())) .saveAsTextFile(outputPath, GzipCodec.class); -// .write() -// .option("compression", "gzip") -// .mode(SaveMode.Append) -// .json(outputPath); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index 97e0a33e1..4aa48583f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -4,7 +4,9 @@ package eu.dnetlib.dhp.countrypropagation; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.util.*; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -17,10 +19,9 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; @@ -28,8 +29,6 @@ public class SparkCountryPropagationJob { private static final Logger log = LoggerFactory.getLogger(SparkCountryPropagationJob.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -90,7 +89,6 @@ public class SparkCountryPropagationJob { boolean saveGraph) { if (saveGraph) { - // updateResultTable(spark, potentialUpdates, inputPath, resultClazz, outputPath); log.info("Reading Graph table from: {}", sourcePath); Dataset res = readPath(spark, sourcePath, resultClazz); @@ -122,7 +120,7 @@ public class SparkCountryPropagationJob { private static List merge(List c1, List c2) { HashSet countries = c1 .stream() - .map(c -> c.getClassid()) + .map(Qualifier::getClassid) .collect(Collectors.toCollection(HashSet::new)); return c2 diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index b3ef3a112..95b870292 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -8,9 +8,7 @@ import java.util.Arrays; import java.util.List; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -18,7 +16,6 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java index 2cea32e58..c60012a74 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java @@ -87,7 +87,7 @@ public class PrepareResultOrcidAssociationStep2 { }); return a; }) - .map(c -> c._2()) + .map(Tuple2::_2) .map(r -> OBJECT_MAPPER.writeValueAsString(r)) .saveAsTextFile(outputPath, GzipCodec.class); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 436a53cbe..40faef7f3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -18,7 +18,6 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -141,34 +140,31 @@ public class SparkOrcidToResultFromSemRelJob { author_surname = author.getSurname(); } if (StringUtils.isNotEmpty(author_surname)) { + // have the same surname. Check the name if (autoritative_author .getSurname() .trim() - .equalsIgnoreCase(author_surname.trim())) { - - // have the same surname. Check the name - if (StringUtils.isNotEmpty(autoritative_author.getName())) { - if (StringUtils.isNotEmpty(author.getName())) { - author_name = author.getName(); + .equalsIgnoreCase(author_surname.trim()) && StringUtils.isNotEmpty(autoritative_author.getName())) { + if (StringUtils.isNotEmpty(author.getName())) { + author_name = author.getName(); + } + if (StringUtils.isNotEmpty(author_name)) { + if (autoritative_author + .getName() + .trim() + .equalsIgnoreCase(author_name.trim())) { + toaddpid = true; } - if (StringUtils.isNotEmpty(author_name)) { + // they could be differently written (i.e. only the initials of the name + // in one of the two + else { if (autoritative_author .getName() .trim() - .equalsIgnoreCase(author_name.trim())) { + .substring(0, 0) + .equalsIgnoreCase(author_name.trim().substring(0, 0))) { toaddpid = true; } - // they could be differently written (i.e. only the initials of the name - // in one of the two - else { - if (autoritative_author - .getName() - .trim() - .substring(0, 0) - .equalsIgnoreCase(author_name.trim().substring(0, 0))) { - toaddpid = true; - } - } } } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java index 27ff727fd..ac61e26f9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java @@ -2,30 +2,25 @@ package eu.dnetlib.dhp.projecttoresult; import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.PropagationConstant.getConstraintList; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Arrays; import java.util.List; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; public class PrepareProjectResultsAssociation { - private static final Logger log = LoggerFactory.getLogger(PrepareDatasourceCountryAssociation.class); + private static final Logger log = LoggerFactory.getLogger(PrepareProjectResultsAssociation.class); public static void main(String[] args) throws Exception { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index c57abb451..1ec521af1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -5,28 +5,27 @@ import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.sql.*; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; public class SparkResultToProjectThroughSemRelJob { - private static final Logger log = LoggerFactory.getLogger(PrepareDatasourceCountryAssociation.class); + private static final Logger log = LoggerFactory.getLogger(SparkResultToProjectThroughSemRelJob.class); public static void main(String[] args) throws Exception { @@ -95,26 +94,21 @@ public class SparkResultToProjectThroughSemRelJob { private static FlatMapFunction, Relation> mapRelationRn() { return value -> { - List new_relations = new ArrayList<>(); - ResultProjectSet potential_update = value._1(); - Optional already_linked = Optional.ofNullable(value._2()); - if (already_linked.isPresent()) { - already_linked - .get() - .getProjectSet() - .stream() - .forEach( - (p -> { - potential_update.getProjectSet().remove(p); - })); - } - String resId = potential_update.getResultId(); - potential_update + List newRelations = new ArrayList<>(); + ResultProjectSet potentialUpdate = value._1(); + Optional alreadyLinked = Optional.ofNullable(value._2()); + alreadyLinked + .ifPresent( + resultProjectSet -> resultProjectSet + .getProjectSet() + .forEach( + (p -> potentialUpdate.getProjectSet().remove(p)))); + String resId = potentialUpdate.getResultId(); + potentialUpdate .getProjectSet() - .stream() .forEach( projectId -> { - new_relations + newRelations .add( getRelation( resId, @@ -125,7 +119,7 @@ public class SparkResultToProjectThroughSemRelJob { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME)); - new_relations + newRelations .add( getRelation( projectId, @@ -137,7 +131,7 @@ public class SparkResultToProjectThroughSemRelJob { PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME)); }); - return new_relations.iterator(); + return newRelations.iterator(); }; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index a5f84cd2f..1a008797d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -10,11 +10,12 @@ import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.*; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -109,10 +110,6 @@ public class PrepareResultCommunitySet { }) .map(value -> OBJECT_MAPPER.writeValueAsString(value._2())) .saveAsTextFile(outputPath, GzipCodec.class); -// .write() -// .mode(SaveMode.Overwrite) -// .option("compression", "gzip") -// .json(outputPath); } private static MapFunction mapResultCommunityFn( @@ -131,7 +128,7 @@ public class PrepareResultCommunitySet { communitySet.addAll(organizationMap.get(oId)); } } - if (communitySet.size() > 0) { + if (!communitySet.isEmpty()) { ResultCommunityList rcl = new ResultCommunityList(); rcl.setResultId(rId); ArrayList communityList = new ArrayList<>(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index 7201a30f6..cb80a90ca 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -4,7 +4,10 @@ package eu.dnetlib.dhp.resulttocommunityfromorganization; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -17,10 +20,9 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Context; +import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; public class SparkResultToCommunityFromOrganizationJob { @@ -59,6 +61,7 @@ public class SparkResultToCommunityFromOrganizationJob { .orElse(Boolean.TRUE); log.info("saveGraph: {}", saveGraph); + @SuppressWarnings("unchecked") Class resultClazz = (Class) Class.forName(resultClassName); SparkConf conf = new SparkConf(); @@ -106,9 +109,12 @@ public class SparkResultToCommunityFromOrganizationJob { List contextList = ret .getContext() .stream() - .map(con -> con.getId()) + .map(Context::getId) .collect(Collectors.toList()); + + @SuppressWarnings("unchecked") R res = (R) ret.getClass().newInstance(); + res.setId(ret.getId()); List propagatedContexts = new ArrayList<>(); for (String cId : communitySet) { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java index 09340369d..0ddb19a1a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java @@ -11,7 +11,6 @@ import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -91,7 +90,7 @@ public class PrepareResultCommunitySetStep2 { }); return a; }) - .map(c -> c._2()) + .map(Tuple2::_2) .map(r -> OBJECT_MAPPER.writeValueAsString(r)) .saveAsTextFile(outputPath, GzipCodec.class); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index 4cb241ef2..3690351fb 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -62,6 +62,7 @@ public class SparkResultToCommunityThroughSemRelJob { .orElse(Boolean.TRUE); log.info("saveGraph: {}", saveGraph); + @SuppressWarnings("unchecked") Class resultClazz = (Class) Class.forName(resultClassName); runWithSparkHiveSession( @@ -105,15 +106,15 @@ public class SparkResultToCommunityThroughSemRelJob { R ret = value._1(); Optional rcl = Optional.ofNullable(value._2()); if (rcl.isPresent()) { - Set context_set = new HashSet<>(); - ret.getContext().stream().forEach(c -> context_set.add(c.getId())); + Set contexts = new HashSet<>(); + ret.getContext().forEach(c -> contexts.add(c.getId())); List contextList = rcl .get() .getCommunityList() .stream() .map( c -> { - if (!context_set.contains(c)) { + if (!contexts.contains(c)) { Context newContext = new Context(); newContext.setId(c); newContext @@ -130,7 +131,10 @@ public class SparkResultToCommunityThroughSemRelJob { }) .filter(Objects::nonNull) .collect(Collectors.toList()); + + @SuppressWarnings("unchecked") R r = (R) ret.getClass().newInstance(); + r.setId(ret.getId()); r.setContext(contextList); ret.mergeFrom(r); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java index 3cf36e572..0ef5ca181 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java @@ -8,6 +8,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Optional; +import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; @@ -91,13 +92,11 @@ public class PrepareResultInstRepoAssociation { private static void prepareDatasourceOrganization( SparkSession spark, String datasourceOrganizationPath, List blacklist) { - String blacklisted = ""; - if (blacklist.size() > 0) { - blacklisted = " AND id != '" + blacklist.get(0) + "'"; - for (int i = 1; i < blacklist.size(); i++) { - blacklisted += " AND id != '" + blacklist.get(i) + "'"; - } - } + + final String blacklisted = blacklist + .stream() + .map(s -> " AND id != '" + s + "'") + .collect(Collectors.joining()); String query = "SELECT source datasourceId, target organizationId " + "FROM ( SELECT id " diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index 01d7b85e4..63824f1a8 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -4,23 +4,24 @@ package eu.dnetlib.dhp.resulttoorganizationfrominstrepo; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; -import java.util.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.broadcast.Broadcast; -import org.apache.spark.sql.*; import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; public class SparkResultToOrganizationFromIstRepoJob { @@ -84,7 +85,6 @@ public class SparkResultToOrganizationFromIstRepoJob { conf, isSparkSessionManaged, spark -> { - // removeOutputDir(spark, outputPath); if (saveGraph) { execPropagation( spark, @@ -105,9 +105,9 @@ public class SparkResultToOrganizationFromIstRepoJob { String outputPath, Class clazz) { - Dataset ds_org = readPath(spark, datasourceorganization, DatasourceOrganization.class); + Dataset dsOrg = readPath(spark, datasourceorganization, DatasourceOrganization.class); - Dataset potentialUpdates = getPotentialRelations(spark, inputPath, clazz, ds_org); + Dataset potentialUpdates = getPotentialRelations(spark, inputPath, clazz, dsOrg); Dataset alreadyLinked = readPath(spark, alreadyLinkedPath, ResultOrganizationSet.class); @@ -125,26 +125,20 @@ public class SparkResultToOrganizationFromIstRepoJob { private static FlatMapFunction, Relation> createRelationFn() { return value -> { - List new_relations = new ArrayList<>(); - ResultOrganizationSet potential_update = value._1(); - Optional already_linked = Optional.ofNullable(value._2()); - List organization_list = potential_update.getOrganizationSet(); - if (already_linked.isPresent()) { - already_linked - .get() - .getOrganizationSet() - .stream() - .forEach( - rId -> { - organization_list.remove(rId); - }); - } - String resultId = potential_update.getResultId(); - organization_list - .stream() + List newRelations = new ArrayList<>(); + ResultOrganizationSet potentialUpdate = value._1(); + Optional alreadyLinked = Optional.ofNullable(value._2()); + List organizations = potentialUpdate.getOrganizationSet(); + alreadyLinked + .ifPresent( + resOrg -> resOrg + .getOrganizationSet() + .forEach(organizations::remove)); + String resultId = potentialUpdate.getResultId(); + organizations .forEach( orgId -> { - new_relations + newRelations .add( getRelation( orgId, @@ -155,7 +149,7 @@ public class SparkResultToOrganizationFromIstRepoJob { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)); - new_relations + newRelations .add( getRelation( resultId, @@ -167,7 +161,7 @@ public class SparkResultToOrganizationFromIstRepoJob { PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)); }); - return new_relations.iterator(); + return newRelations.iterator(); }; } @@ -175,13 +169,13 @@ public class SparkResultToOrganizationFromIstRepoJob { SparkSession spark, String inputPath, Class resultClazz, - Dataset ds_org) { + Dataset dsOrg) { Dataset result = readPath(spark, inputPath, resultClazz); result.createOrReplaceTempView("result"); createCfHbforResult(spark); - ds_org.createOrReplaceTempView("rels"); + dsOrg.createOrReplaceTempView("rels"); return spark .sql(RESULT_ORGANIZATIONSET_QUERY) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index 72e0a63fa..07299f01a 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -90,7 +90,7 @@ public class BulkTagJobTest { } @Test - public void noUpdatesTest() throws Exception { + void noUpdatesTest() throws Exception { final String pathMap = BulkTagJobTest.pathMap; SparkBulkTagJob .main( @@ -128,7 +128,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectNoPreviousContextTest() throws Exception { + void bulktagBySubjectNoPreviousContextTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext") .getPath(); @@ -224,7 +224,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { + void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { final String sourcePath = getClass() .getResource( "/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance") @@ -306,7 +306,7 @@ public class BulkTagJobTest { } @Test - public void bulktagByDatasourceTest() throws Exception { + void bulktagByDatasourceTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource") .getPath(); @@ -378,7 +378,7 @@ public class BulkTagJobTest { } @Test - public void bulktagByZenodoCommunityTest() throws Exception { + void bulktagByZenodoCommunityTest() throws Exception { final String sourcePath = getClass() .getResource( "/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity") @@ -500,7 +500,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectDatasourceTest() throws Exception { + void bulktagBySubjectDatasourceTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource") .getPath(); @@ -628,7 +628,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectDatasourceZenodoCommunityTest() throws Exception { + void bulktagBySubjectDatasourceZenodoCommunityTest() throws Exception { SparkBulkTagJob .main( @@ -724,7 +724,7 @@ public class BulkTagJobTest { } @Test - public void bulktagDatasourcewithConstraintsTest() throws Exception { + void bulktagDatasourcewithConstraintsTest() throws Exception { final String sourcePath = getClass() .getResource( diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java index ca737b79f..861546adb 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java @@ -10,6 +10,7 @@ import org.apache.commons.lang3.StringUtils; import org.dom4j.DocumentException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; import com.google.gson.Gson; @@ -20,12 +21,12 @@ import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; /** Created by miriam on 03/08/2018. */ -public class CommunityConfigurationFactoryTest { +class CommunityConfigurationFactoryTest { private final VerbResolver resolver = new VerbResolver(); @Test - public void parseTest() throws DocumentException, IOException { + void parseTest() throws DocumentException, IOException, SAXException { String xml = IOUtils .toString( getClass() @@ -39,7 +40,7 @@ public class CommunityConfigurationFactoryTest { } @Test - public void applyVerb() + void applyVerb() throws InvocationTargetException, IllegalAccessException, NoSuchMethodException, InstantiationException { Constraint sc = new Constraint(); @@ -52,7 +53,7 @@ public class CommunityConfigurationFactoryTest { } @Test - public void loadSelCriteriaTest() throws DocumentException, IOException { + void loadSelCriteriaTest() throws DocumentException, IOException, SAXException { String xml = IOUtils .toString( getClass() diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java index 88ad43b6b..963ee5529 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java @@ -5,7 +5,6 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import org.apache.commons.io.FileUtils; @@ -25,6 +24,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Software; import scala.Tuple2; @@ -67,7 +67,7 @@ public class CountryPropagationJobTest { } @Test - public void testCountryPropagationSoftware() throws Exception { + void testCountryPropagationSoftware() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/countrypropagation/sample/software") .getPath(); @@ -105,7 +105,7 @@ public class CountryPropagationJobTest { Dataset countryExploded = verificationDs .flatMap( (FlatMapFunction) row -> row.getCountry().iterator(), Encoders.bean(Country.class)) - .map((MapFunction) c -> c.getClassid(), Encoders.STRING()); + .map((MapFunction) Qualifier::getClassid, Encoders.STRING()); Assertions.assertEquals(9, countryExploded.count()); @@ -119,10 +119,9 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryclassid = verificationDs .flatMap((FlatMapFunction>) row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() + List> prova = new ArrayList<>(); + List countryList = row.getCountry(); + countryList .forEach( c -> prova .add( @@ -180,10 +179,9 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryclassname = verificationDs .flatMap( (FlatMapFunction>) row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() + List> prova = new ArrayList<>(); + List countryList = row.getCountry(); + countryList .forEach( c -> prova .add( @@ -241,10 +239,9 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryProvenance = verificationDs .flatMap( (FlatMapFunction>) row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() + List> prova = new ArrayList<>(); + List countryList = row.getCountry(); + countryList .forEach( c -> prova .add( diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java index 238375197..85db7ecf9 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java @@ -65,7 +65,7 @@ public class OrcidPropagationJobTest { } @Test - public void noUpdateTest() throws Exception { + void noUpdateTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate") .getPath(); @@ -111,7 +111,7 @@ public class OrcidPropagationJobTest { } @Test - public void oneUpdateTest() throws Exception { + void oneUpdateTest() throws Exception { SparkOrcidToResultFromSemRelJob .main( new String[] { @@ -178,7 +178,7 @@ public class OrcidPropagationJobTest { } @Test - public void twoUpdatesTest() throws Exception { + void twoUpdatesTest() throws Exception { SparkOrcidToResultFromSemRelJob .main( new String[] { diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java index abed028e1..2fe1bc574 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java @@ -69,7 +69,7 @@ public class ProjectPropagationJobTest { * @throws Exception */ @Test - public void NoUpdateTest() throws Exception { + void NoUpdateTest() throws Exception { final String potentialUpdateDate = getClass() .getResource( @@ -106,7 +106,7 @@ public class ProjectPropagationJobTest { * @throws Exception */ @Test - public void UpdateTenTest() throws Exception { + void UpdateTenTest() throws Exception { final String potentialUpdatePath = getClass() .getResource( "/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates") @@ -178,7 +178,7 @@ public class ProjectPropagationJobTest { * @throws Exception */ @Test - public void UpdateMixTest() throws Exception { + void UpdateMixTest() throws Exception { final String potentialUpdatepath = getClass() .getResource( "/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates") diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java index d739516fc..4dd8b976c 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java @@ -65,7 +65,7 @@ public class ResultToCommunityJobTest { } @Test - public void testSparkResultToCommunityFromOrganizationJob() throws Exception { + void testSparkResultToCommunityFromOrganizationJob() throws Exception { final String preparedInfoPath = getClass() .getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo") .getPath(); diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java index 7709e00a8..0d5b12c80 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java @@ -65,7 +65,7 @@ public class ResultToCommunityJobTest { } @Test - public void testSparkResultToCommunityThroughSemRelJob() throws Exception { + void testSparkResultToCommunityThroughSemRelJob() throws Exception { SparkResultToCommunityThroughSemRelJob .main( new String[] { diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java index cfcccc5f0..fdcb10fb9 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java @@ -67,7 +67,7 @@ public class ResultToOrganizationJobTest { * @throws Exception */ @Test - public void NoUpdateTest() throws Exception { + void NoUpdateTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") .getPath(); @@ -110,7 +110,7 @@ public class ResultToOrganizationJobTest { * @throws Exception */ @Test - public void UpdateNoMixTest() throws Exception { + void UpdateNoMixTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") .getPath(); @@ -176,7 +176,7 @@ public class ResultToOrganizationJobTest { } @Test - public void UpdateMixTest() throws Exception { + void UpdateMixTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix") .getPath(); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java index f06059dd5..95aa749b2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java @@ -13,7 +13,7 @@ import eu.dnetlib.dhp.schema.oaf.AccessRight; import eu.dnetlib.dhp.schema.oaf.Country; import eu.dnetlib.dhp.schema.oaf.Qualifier; -public class CleaningRuleMap extends HashMap> implements Serializable { +public class CleaningRuleMap extends HashMap, SerializableConsumer> implements Serializable { /** * Creates the mapping for the Oaf types subject to cleaning diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java index 9ba153ba5..5502fd391 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java @@ -16,7 +16,7 @@ public class OafCleaner implements Serializable { try { navigate(oaf, mapping); } catch (IllegalAccessException e) { - throw new RuntimeException(e); + throw new IllegalStateException(e); } return oaf; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java index 00ddcb5a8..0d2df11fe 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java @@ -1,22 +1,21 @@ package eu.dnetlib.dhp.oa.graph.dump; -import java.io.*; +import java.io.IOException; +import java.io.Serializable; import java.util.Optional; -import org.apache.commons.compress.archivers.ar.ArArchiveEntry; -import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream; -import org.apache.commons.compress.archivers.tar.TarArchiveEntry; -import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.MakeTarArchive; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; public class MakeTar implements Serializable { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java index d118accba..dc740e811 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java @@ -8,6 +8,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -29,7 +30,7 @@ public class QueryInformationSystem { ""; public CommunityMap getCommunityMap() - throws ISLookUpException, DocumentException { + throws ISLookUpException, DocumentException, SAXException { return getMap(isLookUp.quickSearchProfile(XQUERY)); } @@ -42,12 +43,14 @@ public class QueryInformationSystem { this.isLookUp = isLookUpService; } - private CommunityMap getMap(List communityMap) throws DocumentException { + private CommunityMap getMap(List communityMap) throws DocumentException, SAXException { final CommunityMap map = new CommunityMap(); for (String xml : communityMap) { final Document doc; - doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + doc = reader.read(new StringReader(xml)); Element root = doc.getRootElement(); map.put(root.attribute("id").getValue(), root.attribute("label").getValue()); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index d30b3122c..500fe5986 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -93,7 +93,7 @@ public class ResultMapper implements Serializable { .setDocumentationUrl( value .stream() - .map(v -> v.getValue()) + .map(Field::getValue) .collect(Collectors.toList()))); Optional @@ -109,20 +109,20 @@ public class ResultMapper implements Serializable { .setContactgroup( Optional .ofNullable(ir.getContactgroup()) - .map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setContactperson( Optional .ofNullable(ir.getContactperson()) - .map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setTool( Optional .ofNullable(ir.getTool()) - .map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); @@ -132,7 +132,8 @@ public class ResultMapper implements Serializable { Optional .ofNullable(input.getAuthor()) - .ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList()))); + .ifPresent( + ats -> out.setAuthor(ats.stream().map(ResultMapper::getAuthor).collect(Collectors.toList()))); // I do not map Access Right UNKNOWN or OTHER @@ -219,11 +220,12 @@ public class ResultMapper implements Serializable { if (oInst.isPresent()) { if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { ((GraphResult) out) - .setInstance(oInst.get().stream().map(i -> getGraphInstance(i)).collect(Collectors.toList())); + .setInstance( + oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList())); } else { ((CommunityResult) out) .setInstance( - oInst.get().stream().map(i -> getCommunityInstance(i)).collect(Collectors.toList())); + oInst.get().stream().map(ResultMapper::getCommunityInstance).collect(Collectors.toList())); } } @@ -422,7 +424,7 @@ public class ResultMapper implements Serializable { Optional .ofNullable(i.getInstancetype()) .ifPresent(value -> instance.setType(value.getClassname())); - Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value)); + Optional.ofNullable(i.getUrl()).ifPresent(instance::setUrl); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java index 6ac626518..f86f6918f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java @@ -15,6 +15,7 @@ import org.apache.hadoop.fs.Path; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -31,25 +32,23 @@ public class SaveCommunityMap implements Serializable { private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class); private final QueryInformationSystem queryInformationSystem; - private final Configuration conf; private final BufferedWriter writer; public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException { - conf = new Configuration(); + final Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(conf); Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { - fileSystem.delete(hdfsWritePath); + fileSystem.delete(hdfsWritePath, true); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); queryInformationSystem = new QueryInformationSystem(); queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); - writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); - + FSDataOutputStream fos = fileSystem.create(hdfsWritePath); + writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); } public static void main(String[] args) throws Exception { @@ -74,10 +73,9 @@ public class SaveCommunityMap implements Serializable { final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl); scm.saveCommunityMap(); - } - private void saveCommunityMap() throws ISLookUpException, IOException, DocumentException { + private void saveCommunityMap() throws ISLookUpException, IOException, DocumentException, SAXException { writer.write(Utils.OBJECT_MAPPER.writeValueAsString(queryInformationSystem.getCommunityMap())); writer.close(); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java index fd8262544..ba26b708a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java @@ -17,9 +17,9 @@ import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; public class SendToZenodoHDFS implements Serializable { - private final static String NEW = "new"; // to be used for a brand new deposition in zenodo - private final static String VERSION = "version"; // to be used to upload a new version of a published deposition - private final static String UPDATE = "update"; // to upload content to an open deposition not published + private static final String NEW = "new"; // to be used for a brand new deposition in zenodo + private static final String VERSION = "version"; // to be used to upload a new version of a published deposition + private static final String UPDATE = "update"; // to upload content to an open deposition not published private static final Log log = LogFactory.getLog(SendToZenodoHDFS.class); @@ -85,7 +85,6 @@ public class SendToZenodoHDFS implements Serializable { Path p = fileStatus.getPath(); String p_string = p.toString(); if (!p_string.endsWith("_SUCCESS")) { - // String tmp = p_string.substring(0, p_string.lastIndexOf("/")); String name = p_string.substring(p_string.lastIndexOf("/") + 1); log.info("Sending information for community: " + name); if (communityMap.containsKey(name.substring(0, name.lastIndexOf(".")))) { @@ -102,9 +101,9 @@ public class SendToZenodoHDFS implements Serializable { zenodoApiClient.sendMretadata(metadata); } - if (publish) + if (publish) { zenodoApiClient.publish(); - + } } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java index 984e8b128..8e75e9d92 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java @@ -25,6 +25,9 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class Utils { public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private Utils() { + } + public static void removeOutputDir(SparkSession spark, String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } @@ -57,7 +60,7 @@ public class Utils { public static CommunityMap readCommunityMap(FileSystem fileSystem, String communityMapPath) throws IOException { BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(new Path(communityMapPath)))); - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); try { String line; while ((line = br.readLine()) != null) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java index 55f075e95..b92eb3e60 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.graph.dump.community; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; +import java.util.NoSuchElementException; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -48,36 +49,34 @@ public class CommunitySplit implements Serializable { .union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class)); communities - .stream() .forEach(c -> printResult(c, result, outputPath)); } - private static void printResult(String c, Dataset result, String outputPath) { - Dataset community_products = result - .filter((FilterFunction) r -> containsCommunity(r, c)); + private static void printResult(String community, Dataset result, String outputPath) { + Dataset communityProducts = result + .filter((FilterFunction) r -> containsCommunity(r, community)); try { - community_products.first(); - community_products + communityProducts.first(); + communityProducts .write() .option("compression", "gzip") .mode(SaveMode.Overwrite) - .json(outputPath + "/" + c); - } catch (Exception e) { - + .json(outputPath + "/" + community); + } catch (NoSuchElementException e) { + // ignoring it on purpose } - } - private static boolean containsCommunity(CommunityResult r, String c) { + private static boolean containsCommunity(CommunityResult r, String community) { if (Optional.ofNullable(r.getContext()).isPresent()) { - return r + return !r .getContext() .stream() - .filter(con -> con.getCode().equals(c)) + .filter(con -> con.getCode().equals(community)) .collect(Collectors.toList()) - .size() > 0; + .isEmpty(); } return false; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java index 63970d14b..7ab8a7540 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java @@ -2,9 +2,7 @@ package eu.dnetlib.dhp.oa.graph.dump.community; import java.io.Serializable; -import java.util.*; - -import javax.swing.text.html.Option; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.slf4j.Logger; @@ -12,7 +10,6 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.DumpProducts; -import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.oaf.Result; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java index 2d43888b4..0f2b4c2ed 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java @@ -22,6 +22,7 @@ import org.dom4j.Node; import org.dom4j.io.SAXReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Utils; @@ -29,6 +30,7 @@ import eu.dnetlib.dhp.schema.dump.oaf.Provenance; import eu.dnetlib.dhp.schema.dump.oaf.community.Funder; import eu.dnetlib.dhp.schema.dump.oaf.community.Project; import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Field; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; @@ -127,11 +129,11 @@ public class SparkPrepareResultProject implements Serializable { op.getCode().getValue(), Optional .ofNullable(op.getAcronym()) - .map(a -> a.getValue()) + .map(Field::getValue) .orElse(null), Optional .ofNullable(op.getTitle()) - .map(v -> v.getValue()) + .map(Field::getValue) .orElse(null), Optional .ofNullable(op.getFundingtree()) @@ -140,7 +142,7 @@ public class SparkPrepareResultProject implements Serializable { .stream() .map(ft -> getFunder(ft.getValue())) .collect(Collectors.toList()); - if (tmp.size() > 0) { + if (!tmp.isEmpty()) { return tmp.get(0); } else { return null; @@ -161,30 +163,23 @@ public class SparkPrepareResultProject implements Serializable { } private static Funder getFunder(String fundingtree) { - // ["nsf_________::NSFNSFNational Science - // FoundationUSnsf_________::NSF::CISE/OAD::CISE/CCFDivision - // of Computing and Communication FoundationsDivision of Computing and Communication - // Foundationsnsf_________::NSF::CISE/OADDirectorate for - // Computer & Information Science & EngineeringDirectorate for Computer & - // Information Science & - // Engineeringnsf:fundingStream"] - Funder f = new Funder(); + final Funder f = new Funder(); final Document doc; try { - doc = new SAXReader().read(new StringReader(fundingtree)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + doc = reader.read(new StringReader(fundingtree)); f.setShortName(((Node) (doc.selectNodes("//funder/shortname").get(0))).getText()); f.setName(((Node) (doc.selectNodes("//funder/name").get(0))).getText()); f.setJurisdiction(((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText()); for (Object o : doc.selectNodes("//funding_level_0")) { List node = ((Node) o).selectNodes("./name"); f.setFundingStream(((Node) node.get(0)).getText()); - } return f; - } catch (DocumentException e) { - e.printStackTrace(); + } catch (DocumentException | SAXException e) { + throw new IllegalArgumentException(e); } - return f; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java index 2b80b1d86..39ae32053 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java @@ -60,7 +60,7 @@ public class SparkUpdateProjectInfo implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - extend(spark, inputPath, outputPath, preparedInfoPath);// , inputClazz); + extend(spark, inputPath, outputPath, preparedInfoPath); }); } @@ -77,9 +77,7 @@ public class SparkUpdateProjectInfo implements Serializable { "left") .map((MapFunction, CommunityResult>) value -> { CommunityResult r = value._1(); - Optional.ofNullable(value._2()).ifPresent(rp -> { - r.setProjects(rp.getProjectsList()); - }); + Optional.ofNullable(value._2()).ifPresent(rp -> r.setProjects(rp.getProjectsList())); return r; }, Encoders.bean(CommunityResult.class)) .write() diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java index eb546624e..57708a78d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java @@ -23,5 +23,4 @@ public class Constants implements Serializable { public static final String CONTEXT_NS_PREFIX = "context_____"; public static final String UNKNOWN = "UNKNOWN"; - // public static final String FUNDER_DS = "entityregistry::projects"; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java index ccb84c713..120de9327 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java @@ -1,12 +1,14 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; -import java.io.*; +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.function.Consumer; import java.util.function.Function; -import org.apache.commons.crypto.utils.IoUtils; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -14,15 +16,13 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; -import org.apache.hadoop.io.compress.CompressionOutputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; /** * Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and @@ -33,8 +33,8 @@ import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative; public class CreateContextEntities implements Serializable { private static final Logger log = LoggerFactory.getLogger(CreateContextEntities.class); - private final Configuration conf; - private final BufferedWriter writer; + private final transient Configuration conf; + private final transient BufferedWriter writer; public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -88,7 +88,7 @@ public class CreateContextEntities implements Serializable { } public void execute(final Function producer, String isLookUpUrl) - throws Exception { + throws ISLookUpException { QueryInformationSystem queryInformationSystem = new QueryInformationSystem(); queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); @@ -101,10 +101,9 @@ public class CreateContextEntities implements Serializable { protected void writeEntity(final R r) { try { writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r)); - // log.info("writing context : {}", new Gson().toJson(r)); writer.newLine(); - } catch (final Exception e) { - throw new RuntimeException(e); + } catch (final IOException e) { + throw new IllegalArgumentException(e); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java index 102406315..10f2014d0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java @@ -7,6 +7,7 @@ import java.io.OutputStreamWriter; import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.function.Consumer; import java.util.function.Function; @@ -31,20 +32,21 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; * and the project is not created because of a low coverage in the profiles of openaire ids related to projects */ public class CreateContextRelation implements Serializable { - private static final Logger log = LoggerFactory.getLogger(CreateContextEntities.class); - private final Configuration conf; - private final BufferedWriter writer; - private final QueryInformationSystem queryInformationSystem; + private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class); + private final transient Configuration conf; + private final transient BufferedWriter writer; + private final transient QueryInformationSystem queryInformationSystem; private static final String CONTEX_RELATION_DATASOURCE = "contentproviders"; - private static final String CONTEX_RELATION_PROJECT = "projects"; public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( - CreateContextRelation.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json")); + Objects + .requireNonNull( + CreateContextRelation.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -70,10 +72,6 @@ public class CreateContextRelation implements Serializable { cce.execute(Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class)); log.info("Creating relations for projects... "); -// cce -// .execute( -// Process::getRelation, CONTEX_RELATION_PROJECT, -// ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class)); cce.close(); @@ -107,7 +105,7 @@ public class CreateContextRelation implements Serializable { public void execute(final Function> producer, String category, String prefix) { - final Consumer consumer = ci -> producer.apply(ci).forEach(c -> writeEntity(c)); + final Consumer consumer = ci -> producer.apply(ci).forEach(this::writeEntity); queryInformationSystem.getContextRelation(consumer, category, prefix); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java index 31d105b66..6b9f13277 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java @@ -6,8 +6,6 @@ import java.util.ArrayList; import java.util.List; import org.apache.commons.lang3.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.Utils; @@ -21,8 +19,8 @@ import eu.dnetlib.dhp.schema.dump.oaf.graph.*; * context entity and datasource/projects related to the context. */ public class Process implements Serializable { - private static final Logger log = LoggerFactory.getLogger(Process.class); + @SuppressWarnings("unchecked") public static R getEntity(ContextInfo ci) { try { ResearchInitiative ri; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java index c33a693a5..0ed5de67c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java @@ -11,6 +11,7 @@ import org.dom4j.Element; import org.dom4j.Node; import org.dom4j.io.SAXReader; import org.jetbrains.annotations.NotNull; +import org.xml.sax.SAXException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -84,8 +85,9 @@ public class QueryInformationSystem { final Document doc; try { - - doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + doc = reader.read(new StringReader(xml)); Element root = doc.getRootElement(); cinfo.setId(root.attributeValue("id")); @@ -102,7 +104,7 @@ public class QueryInformationSystem { } consumer.accept(cinfo); - } catch (DocumentException e) { + } catch (DocumentException | SAXException e) { e.printStackTrace(); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java index 868fa89fe..4365e861f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java @@ -61,7 +61,7 @@ public class SparkOrganizationRelation implements Serializable { log.info("organization map : {}", new Gson().toJson(organizationMap)); final String communityMapPath = parser.get("communityMapPath"); - log.info("communityMapPath: {} ", communityMapPath); + log.info("communityMapPath: {}", communityMapPath); SparkConf conf = new SparkConf(); @@ -117,15 +117,12 @@ public class SparkOrganizationRelation implements Serializable { } })); - // if (relList.size() > 0) { spark .createDataset(relList, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); - // } - } @NotNull diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index 00f604b14..d8a1b5c21 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -4,7 +4,9 @@ package eu.dnetlib.dhp.oa.graph.dump.funderresults; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; -import java.util.*; +import java.util.List; +import java.util.Objects; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -14,16 +16,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.api.zenodo.Community; -import eu.dnetlib.dhp.oa.graph.dump.Constants; -import eu.dnetlib.dhp.oa.graph.dump.ResultMapper; import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.dump.oaf.community.Project; import eu.dnetlib.dhp.schema.oaf.Relation; -import scala.Tuple2; /** * Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java index 1a28a21f4..2d2b04b5c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java @@ -18,7 +18,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; @@ -62,6 +61,7 @@ public class SparkResultLinkedToProject implements Serializable { final String relationPath = parser.get("relationPath"); log.info("relationPath: {}", relationPath); + @SuppressWarnings("unchecked") Class inputClazz = (Class) Class.forName(resultClassName); SparkConf conf = new SparkConf(); @@ -95,9 +95,9 @@ public class SparkResultLinkedToProject implements Serializable { ._2() .getId(), Encoders.STRING()) - .mapGroups((MapGroupsFunction, R>) (k, it) -> { - return it.next()._2(); - }, Encoders.bean(inputClazz)) + .mapGroups( + (MapGroupsFunction, R>) (k, it) -> it.next()._2(), + Encoders.bean(inputClazz)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java index 59bdb3914..f87c0eb7a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java @@ -74,24 +74,4 @@ public class DatasourceCompatibilityComparator implements Comparator return lClass.compareTo(rClass); } - /* - * CASE WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY - * ['openaire-cris_1.1']) THEN 'openaire-cris_1.1@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT - * COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire4.0']) THEN - * 'openaire4.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0']) THEN - * 'driver-openaire2.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE - * (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['driver']) THEN - * 'driver@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility) :: TEXT) @> ARRAY ['openaire2.0']) THEN 'openaire2.0@@@dnet:datasourceCompatibilityLevel' WHEN - * (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire3.0']) THEN - * 'openaire3.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility) :: TEXT) @> ARRAY ['openaire2.0_data']) THEN - * 'openaire2.0_data@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE - * (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['native']) THEN - * 'native@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility) :: TEXT) @> ARRAY ['hostedBy']) THEN 'hostedBy@@@dnet:datasourceCompatibilityLevel' WHEN - * (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['notCompatible']) - * THEN 'notCompatible@@@dnet:datasourceCompatibilityLevel' ELSE 'UNKNOWN@@@dnet:datasourceCompatibilityLevel' END - */ } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java index 602213e58..ef419a042 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java @@ -6,8 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.*; import java.util.stream.Collectors; -import javax.xml.crypto.Data; - import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -16,7 +14,6 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; -import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,7 +21,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.oa.graph.clean.CleanGraphSparkJob; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -55,9 +51,11 @@ public class MergeGraphTableSparkJob { String jsonConfiguration = IOUtils .toString( - CleanGraphSparkJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/merge_graphs_parameters.json")); + Objects + .requireNonNull( + MergeGraphTableSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/merge_graphs_parameters.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -133,7 +131,7 @@ public class MergeGraphTableSparkJob { HashSet collectedFromNames = Optional .ofNullable(o.getCollectedfrom()) .map(c -> c.stream().map(KeyValue::getValue).collect(Collectors.toCollection(HashSet::new))) - .orElse(new HashSet()); + .orElse(new HashSet<>()); return !collectedFromNames.contains("Datacite"); }) .write() diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 03c3eeb3c..9aa4e4c31 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -32,8 +32,6 @@ import org.dom4j.Document; import org.dom4j.DocumentFactory; import org.dom4j.DocumentHelper; import org.dom4j.Node; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -88,8 +86,6 @@ public abstract class AbstractMdRecordToOafMapper { protected static final Map nsContext = new HashMap<>(); - private static final Logger log = LoggerFactory.getLogger(DispatchEntitiesApplication.class); - static { nsContext.put("dr", "http://www.driver-repository.eu/namespace/dr"); nsContext.put("dri", "http://www.driver-repository.eu/namespace/dri"); @@ -117,9 +113,6 @@ public abstract class AbstractMdRecordToOafMapper { } public List processMdRecord(final String xml) { - - // log.info("Processing record: " + xml); - try { DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); @@ -134,7 +127,7 @@ public abstract class AbstractMdRecordToOafMapper { doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); if (collectedFrom == null) { - return null; + return Lists.newArrayList(); } final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) @@ -142,7 +135,7 @@ public abstract class AbstractMdRecordToOafMapper { : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); if (hostedBy == null) { - return null; + return Lists.newArrayList(); } final DataInfo info = prepareDataInfo(doc, invisible); @@ -161,21 +154,17 @@ public abstract class AbstractMdRecordToOafMapper { protected String getResultType(final Document doc, final List instances) { final String type = doc.valueOf("//dr:CobjCategory/@type"); - if (StringUtils.isBlank(type) & vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { + if (StringUtils.isBlank(type) && vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { final String instanceType = instances .stream() .map(i -> i.getInstancetype().getClassid()) .findFirst() - .map(s -> UNKNOWN.equalsIgnoreCase(s) ? "0000" : s) + .filter(s -> !UNKNOWN.equalsIgnoreCase(s)) .orElse("0000"); // Unknown return Optional .ofNullable(vocs.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, instanceType)) - .map(q -> q.getClassid()) + .map(Qualifier::getClassid) .orElse("0000"); - /* - * .orElseThrow( () -> new IllegalArgumentException( String.format("'%s' not mapped in %s", instanceType, - * DNET_RESULT_TYPOLOGIES))); - */ } return type; @@ -185,7 +174,7 @@ public abstract class AbstractMdRecordToOafMapper { final String dsId = doc.valueOf(xpathId); final String dsName = doc.valueOf(xpathName); - if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { + if (StringUtils.isBlank(dsId) || StringUtils.isBlank(dsName)) { return null; } @@ -498,7 +487,6 @@ public abstract class AbstractMdRecordToOafMapper { accessRight.setSchemename(qualifier.getSchemename()); // TODO set the OAStatus - // accessRight.setOaStatus(...); return accessRight; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index bbfb7429f..9027b49d7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -127,7 +127,7 @@ public class GenerateEntitiesApplication { .map(k -> new Tuple2<>(k._1().toString(), k._2().toString())) .map(k -> convertToListOaf(k._1(), k._2(), shouldHashId, vocs)) .filter(Objects::nonNull) - .flatMap(list -> list.iterator())); + .flatMap(List::iterator)); } switch (mode) { @@ -135,7 +135,7 @@ public class GenerateEntitiesApplication { save( inputRdd .mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf)) - .reduceByKey((o1, o2) -> OafMapperUtils.merge(o1, o2)) + .reduceByKey(OafMapperUtils::merge) .map(Tuple2::_2), targetPath); break; @@ -191,7 +191,7 @@ public class GenerateEntitiesApplication { case "otherresearchproduct": return Arrays.asList(convertFromJson(s, OtherResearchProduct.class)); default: - throw new RuntimeException("type not managed: " + type.toLowerCase()); + throw new IllegalArgumentException("type not managed: " + type.toLowerCase()); } } @@ -199,9 +199,9 @@ public class GenerateEntitiesApplication { try { return OBJECT_MAPPER.readValue(s, clazz); } catch (final Exception e) { - log.error("Error parsing object of class: " + clazz); + log.error("Error parsing object of class: {}", clazz); log.error(s); - throw new RuntimeException(e); + throw new IllegalArgumentException(e); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java index d5c310c1b..ee1b6a5da 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java @@ -40,9 +40,11 @@ public class MergeClaimsApplication { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - MigrateMongoMdstoresApplication.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); + Objects + .requireNonNull( + MergeClaimsApplication.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json")))); parser.parseArgument(args); Boolean isSparkSessionManaged = Optional diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index a9d3e05fe..b9033702d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -9,10 +9,7 @@ import java.io.IOException; import java.sql.Array; import java.sql.ResultSet; import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.List; +import java.util.*; import java.util.function.Consumer; import java.util.function.Function; import java.util.function.Predicate; @@ -72,8 +69,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - MigrateDbEntitiesApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json"))); + Objects + .requireNonNull( + MigrateDbEntitiesApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json")))); parser.parseArgument(args); @@ -87,7 +86,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i log.info("postgresPassword: xxx"); final String dbSchema = parser.get("dbschema"); - log.info("dbSchema {}: " + dbSchema); + log.info("dbSchema {}: ", dbSchema); final String isLookupUrl = parser.get("isLookupUrl"); log.info("isLookupUrl: {}", isLookupUrl); @@ -659,18 +658,6 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i r1.setDataInfo(info); r1.setLastupdatetimestamp(lastUpdateTimestamp); - // removed because there's no difference between two sides //TODO -// final Relation r2 = new Relation(); -// r2.setRelType(ORG_ORG_RELTYPE); -// r2.setSubRelType(ORG_ORG_SUBRELTYPE); -// r2.setRelClass(relClass); -// r2.setSource(orgId2); -// r2.setTarget(orgId1); -// r2.setCollectedfrom(collectedFrom); -// r2.setDataInfo(info); -// r2.setLastupdatetimestamp(lastUpdateTimestamp); -// return Arrays.asList(r1, r2); - return Arrays.asList(r1); } catch (final Exception e) { throw new RuntimeException(e); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java index 1d4eca2c2..4110bd806 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.io.IOException; import java.io.StringReader; import java.text.SimpleDateFormat; import java.util.Arrays; @@ -82,11 +83,11 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication public static void processPaths(final SparkSession spark, final String outputPath, final Set paths, - final String type) throws Exception { + final String type) { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - log.info("Found " + paths.size() + " not empty mdstores"); + log.info("Found {} not empty mdstores", paths.size()); paths.forEach(log::info); final String[] validPaths = paths @@ -98,7 +99,7 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication spark .read() .parquet(validPaths) - .map((MapFunction) r -> enrichRecord(r), Encoders.STRING()) + .map((MapFunction) MigrateHdfsMdstoresApplication::enrichRecord, Encoders.STRING()) .toJavaRDD() .mapToPair(xml -> new Tuple2<>(new Text(UUID.randomUUID() + ":" + type), new Text(xml))) // .coalesce(1) @@ -120,7 +121,9 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication final String tranDate = dateFormat.format(new Date((Long) r.getAs("dateOfTransformation"))); try { - final Document doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(xml)); final Element head = (Element) doc.selectSingleNode("//*[local-name() = 'header']"); head.addElement(new QName("objIdentifier", DRI_NS_PREFIX)).addText(r.getAs("id")); head.addElement(new QName("dateOfCollection", DRI_NS_PREFIX)).addText(collDate); @@ -135,8 +138,7 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication private static Set mdstorePaths(final String mdstoreManagerUrl, final String format, final String layout, - final String interpretation) - throws Exception { + final String interpretation) throws IOException { final String url = mdstoreManagerUrl + "/mdstores/"; final ObjectMapper objectMapper = new ObjectMapper(); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java index 3f6afbeac..6dbab96cb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java @@ -51,10 +51,10 @@ public class MigrateMongoMdstoresApplication extends AbstractMigrationApplicatio public void execute(final String format, final String layout, final String interpretation) { final Map colls = mdstoreClient.validCollections(format, layout, interpretation); - log.info("Found " + colls.size() + " mdstores"); + log.info("Found {} mdstores", colls.size()); for (final Entry entry : colls.entrySet()) { - log.info("Processing mdstore " + entry.getKey() + " (collection: " + entry.getValue() + ")"); + log.info("Processing mdstore {} (collection: {})", entry.getKey(), entry.getValue()); final String currentColl = entry.getValue(); for (final String xml : mdstoreClient.listRecords(currentColl)) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index d753cddeb..2b49a9dc1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -19,7 +19,6 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; @@ -56,8 +55,8 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { .valueOf("./@nameIdentifierScheme") .trim() .toUpperCase() - .replaceAll(" ", "") - .replaceAll("_", ""); + .replace(" ", "") + .replace("_", ""); author.setPid(new ArrayList<>()); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 7925a7826..02aab4f16 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -13,7 +13,6 @@ import org.dom4j.Node; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; @@ -88,11 +87,11 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { .valueOf("./@nameIdentifierScheme") .trim() .toUpperCase() - .replaceAll(" ", "") - .replaceAll("_", ""); + .replace(" ", "") + .replace("_", ""); if (type.toLowerCase().startsWith(ORCID)) { - final String cleanedId = id.replaceAll("http://orcid.org/", "").replaceAll("https://orcid.org/", ""); + final String cleanedId = id.replace("http://orcid.org/", "").replace("https://orcid.org/", ""); res.add(structuredProperty(cleanedId, ORCID_PID_TYPE, info)); } else if (type.startsWith("MAGID")) { res.add(structuredProperty(id, MAG_PID_TYPE, info)); @@ -388,7 +387,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List prepareResultPids(final Document doc, final DataInfo info) { - final Set res = new HashSet(); + final Set res = new HashSet<>(); res .addAll( prepareListStructPropsWithValidQualifier( diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java index 5523863ff..edfd65299 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java @@ -4,12 +4,10 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.FileNotFoundException; -import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java index a0ce4f5a6..5d32fe926 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java @@ -12,6 +12,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Oaf; @@ -34,7 +35,7 @@ public class AbstractMigrationApplication implements Closeable { this.writer = null; } - public AbstractMigrationApplication(final String hdfsPath) throws Exception { + public AbstractMigrationApplication(final String hdfsPath) throws IOException { log.info(String.format("Creating SequenceFile Writer, hdfsPath=%s", hdfsPath)); @@ -46,15 +47,14 @@ public class AbstractMigrationApplication implements Closeable { SequenceFile.Writer.valueClass(Text.class)); } - private Configuration getConf() throws IOException { - final Configuration conf = new Configuration(); + private Configuration getConf() { + return new Configuration(); /* * conf.set("fs.defaultFS", hdfsNameNode); conf.set("fs.hdfs.impl", * org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", * org.apache.hadoop.fs.LocalFileSystem.class.getName()); System.setProperty("HADOOP_USER_NAME", hdfsUser); * System.setProperty("hadoop.home.dir", "/"); FileSystem.get(URI.create(hdfsNameNode), conf); */ - return conf; } protected void emit(final String s, final String type) { @@ -62,16 +62,16 @@ public class AbstractMigrationApplication implements Closeable { key.set(counter.getAndIncrement() + ":" + type); value.set(s); writer.append(key, value); - } catch (final Exception e) { - throw new RuntimeException(e); + } catch (final IOException e) { + throw new IllegalStateException(e); } } protected void emitOaf(final Oaf oaf) { try { emit(objectMapper.writeValueAsString(oaf), oaf.getClass().getSimpleName().toLowerCase()); - } catch (final Exception e) { - throw new RuntimeException(e); + } catch (JsonProcessingException e) { + throw new IllegalStateException(e); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java index 32f6e7abc..afaac04ea 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java @@ -68,7 +68,7 @@ public class GraphHiveImporterJobTest { } @Test - public void testImportGraphAsHiveDB() throws Exception { + void testImportGraphAsHiveDB() throws Exception { GraphHiveImporterJob .main( diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index b196d1948..edcd72ab4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -50,7 +50,7 @@ public class GraphCleaningFunctionsTest { } @Test - public void testCleaning() throws Exception { + void testCleaning() throws Exception { assertNotNull(vocabularies); assertNotNull(mapping); @@ -166,10 +166,6 @@ public class GraphCleaningFunctionsTest { // TODO add more assertions to verity the cleaned values System.out.println(MAPPER.writeValueAsString(p_cleaned)); - - /* - * assertTrue( p_out .getPid() .stream() .allMatch(sp -> StringUtils.isNotBlank(sp.getValue()))); - */ } private Stream getAuthorPidTypes(Result pub) { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java index 803ae0416..697ec705f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java @@ -10,10 +10,10 @@ import com.github.victools.jsonschema.generator.*; import eu.dnetlib.dhp.schema.dump.oaf.graph.*; @Disabled -public class GenerateJsonSchema { +class GenerateJsonSchema { @Test - public void generateSchema() { + void generateSchema() { SchemaGeneratorConfigBuilder configBuilder = new SchemaGeneratorConfigBuilder(SchemaVersion.DRAFT_7, OptionPreset.PLAIN_JSON) .with(Option.SCHEMA_VERSION_INDICATOR) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java index 51e4e1033..41b906e58 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java @@ -22,7 +22,7 @@ public class MakeTarTest { } @Test - public void testTar() throws IOException { + void testTar() throws IOException { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); fs diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java index d5a9ba8dd..03eed7a45 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java @@ -1,10 +1,11 @@ package eu.dnetlib.dhp.oa.graph.dump; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.HashMap; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; @@ -15,7 +16,6 @@ import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.slf4j.Logger; @@ -37,8 +37,6 @@ public class PrepareResultProjectJobTest { private static final Logger log = LoggerFactory .getLogger(eu.dnetlib.dhp.oa.graph.dump.PrepareResultProjectJobTest.class); - private static final HashMap map = new HashMap<>(); - @BeforeAll public static void beforeAll() throws IOException { workingDir = Files @@ -69,7 +67,7 @@ public class PrepareResultProjectJobTest { } @Test - public void testNoMatch() throws Exception { + void testNoMatch() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/no_match") @@ -90,12 +88,12 @@ public class PrepareResultProjectJobTest { org.apache.spark.sql.Dataset verificationDataset = spark .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); - Assertions.assertEquals(0, verificationDataset.count()); + assertEquals(0, verificationDataset.count()); } @Test - public void testMatchOne() throws Exception { + void testMatchOne() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_one") @@ -116,12 +114,11 @@ public class PrepareResultProjectJobTest { org.apache.spark.sql.Dataset verificationDataset = spark .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); - Assertions.assertTrue(verificationDataset.count() == 1); + assertEquals(1, verificationDataset.count()); - Assertions - .assertEquals( - 1, - verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); + assertEquals( + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); verificationDataset.createOrReplaceTempView("table"); @@ -131,14 +128,14 @@ public class PrepareResultProjectJobTest { "from table " + "lateral view explode (projectsList) pl as projList"); - Assertions.assertEquals(1, check.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); + assertEquals(1, check.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); verificationDataset.show(false); } @Test - public void testMatch() throws Exception { + void testMatch() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/match") @@ -159,16 +156,14 @@ public class PrepareResultProjectJobTest { org.apache.spark.sql.Dataset verificationDataset = spark .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); - Assertions.assertTrue(verificationDataset.count() == 2); + assertEquals(2, verificationDataset.count()); - Assertions - .assertEquals( - 1, - verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); - Assertions - .assertEquals( - 1, - verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); + assertEquals( + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); + assertEquals( + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); verificationDataset.createOrReplaceTempView("dataset"); @@ -177,62 +172,54 @@ public class PrepareResultProjectJobTest { + "lateral view explode(projectsList) p as MyT "; org.apache.spark.sql.Dataset resultExplodedProvenance = spark.sql(query); - Assertions.assertEquals(3, resultExplodedProvenance.count()); - Assertions - .assertEquals( - 2, - resultExplodedProvenance - .filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") - .count()); + assertEquals(3, resultExplodedProvenance.count()); + assertEquals( + 2, + resultExplodedProvenance + .filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") + .count()); - Assertions - .assertEquals( - 2, - resultExplodedProvenance - .filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'") - .count()); + assertEquals( + 2, + resultExplodedProvenance + .filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::03376222b28a3aebf2730ac514818d04' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::03376222b28a3aebf2730ac514818d04' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") + .count()); - Assertions - .assertEquals( - 3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); + assertEquals( + 3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java index c6666342a..98902b618 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java @@ -14,12 +14,13 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.xml.sax.SAXException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class QueryInformationSystemTest { +class QueryInformationSystemTest { private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " + @@ -66,7 +67,7 @@ public class QueryInformationSystemTest { private Map map; @BeforeEach - public void setUp() throws ISLookUpException, DocumentException { + public void setUp() throws ISLookUpException, DocumentException, SAXException { lenient().when(isLookUpService.quickSearchProfile(XQUERY)).thenReturn(communityMap); queryInformationSystem = new QueryInformationSystem(); queryInformationSystem.setIsLookUp(isLookUpService); @@ -74,13 +75,13 @@ public class QueryInformationSystemTest { } @Test - public void testSize() throws ISLookUpException { + void testSize() throws ISLookUpException { Assertions.assertEquals(23, map.size()); } @Test - public void testContent() { + void testContent() { Assertions.assertTrue(map.containsKey("egi") && map.get("egi").equals("EGI Federation")); Assertions.assertTrue(map.containsKey("fet-fp7") && map.get("fet-fp7").equals("FET FP7")); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java index 42ad5634a..e6f0e9106 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java @@ -62,7 +62,7 @@ public class SplitForCommunityTest { } @Test - public void test1() { + void testCommunitySplit() { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/splitForCommunity") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java index 20a46cee0..a164593ec 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java @@ -68,7 +68,7 @@ public class UpdateProjectInfoTest { } @Test - public void test1() throws Exception { + void test1() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java index 05dc423cb..8d06758a8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java @@ -32,7 +32,7 @@ public class ZenodoUploadTest { } @Test - public void testNewDeposition() throws IOException { + void testNewDeposition() throws IOException { CommunityMap communityMap = new CommunityMap(); communityMap.put("ni", "Neuroinformatics"); communityMap.put("dh-ch", "Digital Humanities and Cultural Heritage"); @@ -86,7 +86,7 @@ public class ZenodoUploadTest { } @Test - public void testNewVersion() throws IOException, MissingConceptDoiException { + void testNewVersion() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); @@ -137,7 +137,7 @@ public class ZenodoUploadTest { } @Test - public void readCommunityMap() throws IOException { + void readCommunityMap() throws IOException { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); System.out .println( diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java index 3ecbd1894..f881e6b30 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java @@ -86,7 +86,7 @@ public class CreateEntityTest { } @Test - public void test1() throws ISLookUpException, IOException { + void test1() throws ISLookUpException, IOException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); queryInformationSystem.getContextInformation(consumer); @@ -144,7 +144,7 @@ public class CreateEntityTest { @Test @Disabled - public void test2() throws IOException, ISLookUpException { + void test2() throws IOException, ISLookUpException { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); Path hdfsWritePath = new Path(workingDir + "/prova"); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java index b556fa2d6..69e550d45 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java @@ -16,7 +16,7 @@ import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.utils.DHPUtils; -public class CreateRelationTest { +class CreateRelationTest { List communityContext = Arrays .asList( @@ -473,7 +473,7 @@ public class CreateRelationTest { } @Test - public void test1() { + void test1() { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java index 3d42f124e..e43383ef4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java @@ -60,7 +60,7 @@ public class ExtractRelationFromEntityTest { } @Test - public void test1() { + void test1() { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/singelRecord_pub.json") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java index 75d5a2673..eb5919fd5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java @@ -4,13 +4,14 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; import org.dom4j.DocumentException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.schema.dump.oaf.graph.Funder; -public class FunderParsingTest { +class FunderParsingTest { @Test - public void testFunderTwoLevels() throws DocumentException { + void testFunderTwoLevels() throws DocumentException { String funding_Stream = "nsf_________::NSFNSFNational Science " + @@ -37,7 +38,7 @@ public class FunderParsingTest { } @Test - public void testFunderThreeeLevels() throws DocumentException { + void testFunderThreeeLevels() throws DocumentException, SAXException { String funding_stream = "ec__________::EC" + "EC" + "European Commission" + diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java index d769aa138..049959704 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java @@ -17,7 +17,7 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class QueryInformationSystemTest { +class QueryInformationSystemTest { private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " + @@ -513,7 +513,7 @@ public class QueryInformationSystemTest { } @Test - public void testSizeEntity() throws ISLookUpException { + void testSizeEntity() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); @@ -523,7 +523,7 @@ public class QueryInformationSystemTest { } @Test - public void testSizeRelation() throws ISLookUpException { + void testSizeRelation() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); @@ -534,7 +534,7 @@ public class QueryInformationSystemTest { } @Test - public void testContentRelation() throws ISLookUpException { + void testContentRelation() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); @@ -572,7 +572,7 @@ public class QueryInformationSystemTest { } @Test - public void testContentEntity() throws ISLookUpException { + void testContentEntity() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java index ea2dc73ca..50a9f26b4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java @@ -69,7 +69,7 @@ public class RelationFromOrganizationTest { } @Test - public void test1() throws Exception { + void test1() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java index 6c5ebbab3..d1e3b3acc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java @@ -69,7 +69,7 @@ public class ResultLinkedToProjectTest { } @Test - public void testNoMatch() throws Exception { + void testNoMatch() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json") @@ -102,7 +102,7 @@ public class ResultLinkedToProjectTest { } @Test - public void testMatchOne() throws Exception { + void testMatchOne() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/papers.json") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java index 71bf5d942..8ac0c552f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java @@ -65,7 +65,7 @@ public class SplitPerFunderTest { } @Test - public void test1() throws Exception { + void test1() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java index 0089811cf..2d28ee305 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java @@ -15,7 +15,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Datasource; -public class MergeGraphTableSparkJobTest { +class MergeGraphTableSparkJobTest { private ObjectMapper mapper; @@ -25,7 +25,7 @@ public class MergeGraphTableSparkJobTest { } @Test - public void testMergeDatasources() throws IOException { + void testMergeDatasources() throws IOException { assertEquals( "openaire-cris_1.1", MergeGraphTableSparkJob diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java index e0d202209..1e974dd69 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java @@ -24,7 +24,7 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class GenerateEntitiesApplicationTest { +class GenerateEntitiesApplicationTest { @Mock private ISLookUpService isLookUpService; @@ -44,7 +44,7 @@ public class GenerateEntitiesApplicationTest { } @Test - public void testMergeResult() throws IOException { + void testMergeResult() throws IOException { Result publication = getResult("oaf_record.xml", Publication.class); Result dataset = getResult("odf_dataset.xml", Dataset.class); Result software = getResult("odf_software.xml", Software.class); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 87738a0e8..000dbfe25 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions.cleanup; @@ -7,6 +8,7 @@ import static org.mockito.Mockito.lenient; import java.io.IOException; import java.util.List; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -20,7 +22,6 @@ import org.mockito.junit.jupiter.MockitoExtension; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; @@ -28,7 +29,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class MappersTest { +class MappersTest { @Mock private ISLookUpService isLookUpService; @@ -40,8 +41,8 @@ public class MappersTest { public void setUp() throws Exception { lenient().when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARIES_XQUERY)).thenReturn(vocs()); lenient() - .when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY)) - .thenReturn(synonyms()); + .when(isLookUpService.quickSearchProfile(VocabularyGroup.VOCABULARY_SYNONYMS_XQUERY)) + .thenReturn(synonyms()); vocs = VocabularyGroup.loadVocsFromIS(isLookUpService); } @@ -49,7 +50,7 @@ public class MappersTest { @Test void testPublication() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -70,24 +71,24 @@ public class MappersTest { assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); assertFalse(p.getDataInfo().getInvisible()); - assertTrue(p.getSource().size() == 1); + assertEquals(1, p.getSource().size()); assertTrue(StringUtils.isNotBlank(p.getDateofcollection())); assertTrue(StringUtils.isNotBlank(p.getDateoftransformation())); assertTrue(p.getAuthor().size() > 0); final Optional author = p - .getAuthor() - .stream() - .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) - .findFirst(); + .getAuthor() + .stream() + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) + .findFirst(); assertTrue(author.isPresent()); final StructuredProperty pid = author - .get() - .getPid() - .stream() - .findFirst() - .get(); + .get() + .getPid() + .stream() + .findFirst() + .orElseThrow(() -> new IllegalStateException("missing author pid")); assertEquals("0000-0001-6651-1178", pid.getValue()); assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid()); assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname()); @@ -106,12 +107,11 @@ public class MappersTest { assertNotNull(p.getInstance()); assertTrue(p.getInstance().size() > 0); p - .getInstance() - .stream() - .forEach(i -> { - assertNotNull(i.getAccessright()); - assertEquals("OPEN", i.getAccessright().getClassid()); - }); + .getInstance() + .forEach(i -> { + assertNotNull(i.getAccessright()); + assertEquals("OPEN", i.getAccessright().getClassid()); + }); assertEquals("0001", p.getInstance().get(0).getRefereed().getClassid()); assertNotNull(p.getInstance().get(0).getPid()); assertTrue(p.getInstance().get(0).getPid().isEmpty()); @@ -140,17 +140,15 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(r2.getRelType())); assertTrue(r1.getValidated()); assertTrue(r2.getValidated()); - assertEquals(r1.getValidationDate(), "2020-01-01"); - assertEquals(r2.getValidationDate(), "2020-01-01"); - // System.out.println(new ObjectMapper().writeValueAsString(p)); - // System.out.println(new ObjectMapper().writeValueAsString(r1)); - // System.out.println(new ObjectMapper().writeValueAsString(r2)); + assertEquals("2020-01-01", r1.getValidationDate()); + assertEquals("2020-01-01", r2.getValidationDate()); } @Test void testPublication_PubMed() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record_pubmed.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record_pubmed.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -172,18 +170,18 @@ public class MappersTest { assertTrue(p.getAuthor().size() > 0); final Optional author = p - .getAuthor() - .stream() - .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) - .findFirst(); + .getAuthor() + .stream() + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) + .findFirst(); assertTrue(author.isPresent()); final StructuredProperty pid = author - .get() - .getPid() - .stream() - .findFirst() - .get(); + .get() + .getPid() + .stream() + .findFirst() + .get(); assertEquals("0000-0001-6651-1178", pid.getValue()); assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid()); assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname()); @@ -195,23 +193,22 @@ public class MappersTest { assertTrue(p.getSubject().size() > 0); assertTrue(p.getPid().size() > 0); - assertEquals(p.getPid().get(0).getValue(), "PMC1517292"); - assertEquals(p.getPid().get(0).getQualifier().getClassid(), "pmc"); + assertEquals("PMC1517292", p.getPid().get(0).getValue()); + assertEquals("pmc", p.getPid().get(0).getQualifier().getClassid()); assertNotNull(p.getInstance()); assertTrue(p.getInstance().size() > 0); p - .getInstance() - .stream() - .forEach(i -> { - assertNotNull(i.getAccessright()); - assertEquals("OPEN", i.getAccessright().getClassid()); - }); + .getInstance() + .forEach(i -> { + assertNotNull(i.getAccessright()); + assertEquals("OPEN", i.getAccessright().getClassid()); + }); assertEquals("UNKNOWN", p.getInstance().get(0).getRefereed().getClassid()); assertNotNull(p.getInstance().get(0).getPid()); - assertTrue(p.getInstance().get(0).getPid().size() == 2); + assertEquals(2, p.getInstance().get(0).getPid().size()); - assertTrue(p.getInstance().get(0).getAlternateIdentifier().size() == 1); + assertEquals(1, p.getInstance().get(0).getAlternateIdentifier().size()); assertEquals("doi", p.getInstance().get(0).getAlternateIdentifier().get(0).getQualifier().getClassid()); assertEquals("10.3897/oneeco.2.e13718", p.getInstance().get(0).getAlternateIdentifier().get(0).getValue()); @@ -222,7 +219,7 @@ public class MappersTest { @Test void testPublicationInvisible() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record.xml"))); final List list = new OafToOafMapper(vocs, true, true).processMdRecord(xml); @@ -237,7 +234,7 @@ public class MappersTest { @Test void testDataset() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_dataset.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_dataset.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -275,17 +272,18 @@ public class MappersTest { assertTrue(d.getAuthor().size() > 0); final Optional author = d - .getAuthor() - .stream() - .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) - .findFirst(); + .getAuthor() + .stream() + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) + .findFirst(); assertTrue(author.isPresent()); - final StructuredProperty pid = author - .get() - .getPid() - .stream() - .findFirst() - .get(); + final Optional oPid = author + .get() + .getPid() + .stream() + .findFirst(); + assertTrue(oPid.isPresent()); + final StructuredProperty pid = oPid.get(); assertEquals("0000-0001-9074-1619", pid.getValue()); assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid()); assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname()); @@ -297,10 +295,10 @@ public class MappersTest { assertEquals(1, author.get().getAffiliation().size()); final Optional> opAff = author - .get() - .getAffiliation() - .stream() - .findFirst(); + .get() + .getAffiliation() + .stream() + .findFirst(); assertTrue(opAff.isPresent()); final Field affiliation = opAff.get(); assertEquals("ISTI-CNR", affiliation.getValue()); @@ -313,12 +311,11 @@ public class MappersTest { assertNotNull(d.getInstance()); assertTrue(d.getInstance().size() > 0); d - .getInstance() - .stream() - .forEach(i -> { - assertNotNull(i.getAccessright()); - assertEquals("OPEN", i.getAccessright().getClassid()); - }); + .getInstance() + .forEach(i -> { + assertNotNull(i.getAccessright()); + assertEquals("OPEN", i.getAccessright().getClassid()); + }); assertEquals("0001", d.getInstance().get(0).getRefereed().getClassid()); assertNotNull(d.getInstance().get(0).getPid()); assertFalse(d.getInstance().get(0).getPid().isEmpty()); @@ -344,13 +341,14 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(r2.getRelType())); assertTrue(r1.getValidated()); assertTrue(r2.getValidated()); - assertEquals(r1.getValidationDate(), "2020-01-01"); - assertEquals(r2.getValidationDate(), "2020-01-01"); + assertEquals("2020-01-01", r1.getValidationDate()); + assertEquals("2020-01-01", r2.getValidationDate()); } @Test void testOdfBielefeld() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_bielefeld.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_bielefeld.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -364,13 +362,14 @@ public class MappersTest { assertTrue(p.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:pub.uni-bielefeld.de:2949739"))); // assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); + assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(p.getAuthor().size() > 0); final Optional author = p - .getAuthor() - .stream() - .findFirst(); + .getAuthor() + .stream() + .findFirst(); assertTrue(author.isPresent()); assertEquals("Potwarka, Luke R.", author.get().getFullname()); @@ -386,18 +385,18 @@ public class MappersTest { assertNotNull(p.getInstance()); assertTrue(p.getInstance().size() > 0); p - .getInstance() - .stream() - .forEach(i -> { - assertNotNull(i.getAccessright()); - assertEquals("OPEN", i.getAccessright().getClassid()); - }); + .getInstance() + .forEach(i -> { + assertNotNull(i.getAccessright()); + assertEquals("OPEN", i.getAccessright().getClassid()); + }); assertEquals("UNKNOWN", p.getInstance().get(0).getRefereed().getClassid()); } @Test void testOpentrial() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_opentrial.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_opentrial.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -432,17 +431,17 @@ public class MappersTest { assertNotNull(d.getTitle()); assertEquals(1, d.getTitle().size()); assertEquals( - "Validation of the Goodstrength System for Assessment of Abdominal Wall Strength in Patients With Incisional Hernia", - d - .getTitle() - .get(0) - .getValue()); + "Validation of the Goodstrength System for Assessment of Abdominal Wall Strength in Patients With Incisional Hernia", + d + .getTitle() + .get(0) + .getValue()); assertNotNull(d.getDescription()); assertEquals(1, d.getDescription().size()); assertTrue(StringUtils.isNotBlank(d.getDescription().get(0).getValue())); - assertTrue(d.getAuthor().size() == 1); + assertEquals(1, d.getAuthor().size()); assertEquals("Jensen, Kristian K", d.getAuthor().get(0).getFullname()); assertEquals("Kristian K.", d.getAuthor().get(0).getName()); assertEquals("Jensen", d.getAuthor().get(0).getSurname()); @@ -460,7 +459,7 @@ public class MappersTest { assertTrue(d.getContext().isEmpty()); assertNotNull(d.getInstance()); - assertTrue(d.getInstance().size() == 1); + assertEquals(1, d.getInstance().size()); final Instance i = d.getInstance().get(0); @@ -513,7 +512,7 @@ public class MappersTest { @Test void testSoftware() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_software.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_software.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -530,22 +529,15 @@ public class MappersTest { assertTrue(s.getInstance().size() > 0); } - // @Test - void testDataset_2() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_dataset_2.xml")); - - final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); - - System.out.println("***************"); - System.out.println(new ObjectMapper().writeValueAsString(list)); - System.out.println("***************"); - } - @Test void testClaimDedup() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_claim_dedup.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_dedup.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); + assertNotNull(list); + assertFalse(list.isEmpty()); + System.out.println("***************"); System.out.println(new ObjectMapper().writeValueAsString(list)); System.out.println("***************"); @@ -553,7 +545,7 @@ public class MappersTest { @Test void testNakala() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_nakala.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_nakala.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -581,7 +573,7 @@ public class MappersTest { @Test void testEnermaps() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("enermaps.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("enermaps.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -606,7 +598,8 @@ public class MappersTest { @Test void testClaimFromCrossref() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_claim_crossref.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_crossref.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -622,7 +615,7 @@ public class MappersTest { @Test void testODFRecord() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_record.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_record.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); System.out.println(new ObjectMapper().writeValueAsString(list)); @@ -636,7 +629,7 @@ public class MappersTest { @Test void testTextGrid() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("textgrid.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("textgrid.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -650,9 +643,9 @@ public class MappersTest { assertEquals(1, p.getAuthor().size()); assertEquals("OPEN", p.getBestaccessright().getClassid()); - assertTrue(p.getPid().size() == 1); + assertEquals(1, p.getPid().size()); assertTrue(PidType.isValid(p.getPid().get(0).getQualifier().getClassid())); - assertTrue(PidType.handle.equals(PidType.valueOf(p.getPid().get(0).getQualifier().getClassid()))); + assertEquals(PidType.handle, PidType.valueOf(p.getPid().get(0).getQualifier().getClassid())); assertEquals("hdl:11858/00-1734-0000-0003-EE73-2", p.getPid().get(0).getValue()); assertEquals("dataset", p.getResulttype().getClassname()); assertEquals(1, p.getInstance().size()); @@ -660,7 +653,7 @@ public class MappersTest { assertValidId(p.getInstance().get(0).getCollectedfrom().getKey()); assertValidId(p.getInstance().get(0).getHostedby().getKey()); assertEquals( - "http://creativecommons.org/licenses/by/3.0/de/legalcode", p.getInstance().get(0).getLicense().getValue()); + "http://creativecommons.org/licenses/by/3.0/de/legalcode", p.getInstance().get(0).getLicense().getValue()); assertEquals(1, p.getInstance().size()); assertNotNull(p.getInstance().get(0).getAlternateIdentifier()); @@ -670,7 +663,7 @@ public class MappersTest { @Test void testBologna() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf-bologna.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf-bologna.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -687,7 +680,7 @@ public class MappersTest { @Test void testJairo() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_jairo.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_jairo.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -700,7 +693,7 @@ public class MappersTest { assertNotNull(p.getTitle()); assertFalse(p.getTitle().isEmpty()); - assertTrue(p.getTitle().size() == 1); + assertEquals(1, p.getTitle().size()); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); final Publication p_cleaned = cleanup(fixVocabularyNames(p)); @@ -711,7 +704,8 @@ public class MappersTest { @Test void testOdfFromHdfs() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_from_hdfs.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_from_hdfs.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -730,9 +724,9 @@ public class MappersTest { assertTrue(p.getAuthor().size() > 0); final Optional author = p - .getAuthor() - .stream() - .findFirst(); + .getAuthor() + .stream() + .findFirst(); assertTrue(author.isPresent()); assertEquals("Museum Sønderjylland", author.get().getFullname()); @@ -746,12 +740,11 @@ public class MappersTest { assertNotNull(p.getInstance()); assertTrue(p.getInstance().size() > 0); p - .getInstance() - .stream() - .forEach(i -> { - assertNotNull(i.getAccessright()); - assertEquals("UNKNOWN", i.getAccessright().getClassid()); - }); + .getInstance() + .forEach(i -> { + assertNotNull(i.getAccessright()); + assertEquals("UNKNOWN", i.getAccessright().getClassid()); + }); assertEquals("UNKNOWN", p.getInstance().get(0).getRefereed().getClassid()); } @@ -765,14 +758,17 @@ public class MappersTest { private List vocs() throws IOException { return IOUtils - .readLines( - GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt")); + .readLines( + Objects + .requireNonNull(MappersTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt"))); } private List synonyms() throws IOException { return IOUtils - .readLines( - GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt")); + .readLines( + Objects + .requireNonNull( + MappersTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt"))); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index d529d2eb2..69943435a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -32,7 +32,7 @@ import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; @ExtendWith(MockitoExtension.class) -public class MigrateDbEntitiesApplicationTest { +class MigrateDbEntitiesApplicationTest { private MigrateDbEntitiesApplication app; @@ -58,7 +58,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessDatasource() throws Exception { + void testProcessDatasource() throws Exception { final List fields = prepareMocks("datasources_resultset_entry.json"); final List list = app.processDatasource(rs); @@ -81,7 +81,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessProject() throws Exception { + void testProcessProject() throws Exception { final List fields = prepareMocks("projects_resultset_entry.json"); final List list = app.processProject(rs); @@ -99,7 +99,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessOrganization() throws Exception { + void testProcessOrganization() throws Exception { final List fields = prepareMocks("organizations_resultset_entry.json"); final List list = app.processOrganization(rs); @@ -126,7 +126,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessDatasourceOrganization() throws Exception { + void testProcessDatasourceOrganization() throws Exception { final List fields = prepareMocks("datasourceorganization_resultset_entry.json"); final List list = app.processDatasourceOrganization(rs); @@ -143,7 +143,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessProjectOrganization() throws Exception { + void testProcessProjectOrganization() throws Exception { final List fields = prepareMocks("projectorganization_resultset_entry.json"); final List list = app.processProjectOrganization(rs); @@ -162,7 +162,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessClaims_context() throws Exception { + void testProcessClaims_context() throws Exception { final List fields = prepareMocks("claimscontext_resultset_entry.json"); final List list = app.processClaims(rs); @@ -177,7 +177,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessClaims_rels() throws Exception { + void testProcessClaims_rels() throws Exception { final List fields = prepareMocks("claimsrel_resultset_entry.json"); final List list = app.processClaims(rs); @@ -208,9 +208,6 @@ public class MigrateDbEntitiesApplicationTest { assertValidId(r1.getCollectedfrom().get(0).getKey()); assertValidId(r2.getCollectedfrom().get(0).getKey()); - - // System.out.println(new ObjectMapper().writeValueAsString(r1)); - // System.out.println(new ObjectMapper().writeValueAsString(r2)); } private List prepareMocks(final String jsonFile) throws IOException, SQLException { @@ -273,7 +270,7 @@ public class MigrateDbEntitiesApplicationTest { final String[] values = ((List) tf.getValue()) .stream() .filter(Objects::nonNull) - .map(o -> o.toString()) + .map(Object::toString) .toArray(String[]::new); Mockito.when(arr.getArray()).thenReturn(values); @@ -334,6 +331,7 @@ public class MigrateDbEntitiesApplicationTest { return new Float(getValueAs(name, fields).toString()); } + @SuppressWarnings("unchecked") private T getValueAs(final String name, final List fields) { return fields .stream() diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java index fb2c90e5c..3b9616de3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java @@ -26,8 +26,6 @@ import io.fares.junit.mongodb.MongoForAllExtension; @Disabled public class MigrateMongoMdstoresApplicationTest { - private static final Logger log = LoggerFactory.getLogger(MigrateMongoMdstoresApplicationTest.class); - public static final String COLL_NAME = "9eed8a4d-bb41-47c3-987f-9d06aee0dec0::1453898911558"; @RegisterExtension diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java index 3fd365416..c9c32edd9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java @@ -75,14 +75,14 @@ public class PatchRelationApplicationTest { } @Test - public void testPatchRelationApplication() throws Exception { + void testPatchRelationApplication() throws Exception { final String graphBasePath = workingDir.toString() + "/graphBasePath"; PatchRelationsApplication.main(new String[] { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-graphBasePath", graphBasePath, - "-workingDir", workingDir.toString() + "/workingDir", - "-idMappingPath", workingDir.toString() + "/" + ID_MAPPING_PATH + "-workingDir", workingDir + "/workingDir", + "-idMappingPath", workingDir + "/" + ID_MAPPING_PATH }); final List rels = spark diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java index 110fabf45..ec059ad73 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java @@ -9,7 +9,7 @@ import java.util.List; import org.junit.jupiter.api.Test; -public class ReflectionTest { +class ReflectionTest { private final Cleaner cleaner = new Cleaner(); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java index 7534ce4bd..ed33ff6b6 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java @@ -46,9 +46,11 @@ public class CreateRelatedEntitiesJob_phase1 { String jsonConfiguration = IOUtils .toString( - PrepareRelationsJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json")); + Objects + .requireNonNull( + CreateRelatedEntitiesJob_phase1.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -70,6 +72,7 @@ public class CreateRelatedEntitiesJob_phase1 { String graphTableClassName = parser.get("graphTableClassName"); log.info("graphTableClassName: {}", graphTableClassName); + @SuppressWarnings("unchecked") Class entityClazz = (Class) Class.forName(graphTableClassName); SparkConf conf = new SparkConf(); @@ -146,7 +149,11 @@ public class CreateRelatedEntitiesJob_phase1 { Result result = (Result) entity; if (result.getTitle() != null && !result.getTitle().isEmpty()) { - final StructuredProperty title = result.getTitle().stream().findFirst().get(); + final StructuredProperty title = result + .getTitle() + .stream() + .findFirst() + .orElseThrow(() -> new IllegalStateException("missing title in " + entity.getId())); title.setValue(StringUtils.left(title.getValue(), ModelHardLimits.MAX_TITLE_LENGTH)); re.setTitle(title); } @@ -196,7 +203,7 @@ public class CreateRelatedEntitiesJob_phase1 { List> f = p.getFundingtree(); if (!f.isEmpty()) { - re.setFundingtree(f.stream().map(s -> s.getValue()).collect(Collectors.toList())); + re.setFundingtree(f.stream().map(Field::getValue).collect(Collectors.toList())); } break; } @@ -211,16 +218,16 @@ public class CreateRelatedEntitiesJob_phase1 { return Optional .ofNullable(f) .filter(Objects::nonNull) - .map(x -> x.getValue()) + .map(Field::getValue) .orElse(defaultValue); } /** * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text - * file, + * file * - * @param spark - * @param relationPath + * @param spark the SparkSession + * @param relationPath the path storing the relation objects * @return the Dataset containing all the relationships */ private static Dataset readPathRelation( diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java index c013a2bf6..85fb4a6b2 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java @@ -49,9 +49,11 @@ public class CreateRelatedEntitiesJob_phase2 { String jsonConfiguration = IOUtils .toString( - PrepareRelationsJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase2.json")); + Objects + .requireNonNull( + CreateRelatedEntitiesJob_phase2.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase2.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java index b3f785492..ae899c3d8 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java @@ -131,14 +131,14 @@ public class PrepareRelationsJob { Set relationFilter, int sourceMaxRelations, int targetMaxRelations, int relPartitions) { JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath) - .filter(rel -> rel.getDataInfo().getDeletedbyinference() == false) - .filter(rel -> relationFilter.contains(StringUtils.lowerCase(rel.getRelClass())) == false); + .filter(rel -> !rel.getDataInfo().getDeletedbyinference()) + .filter(rel -> !relationFilter.contains(StringUtils.lowerCase(rel.getRelClass()))); JavaRDD pruned = pruneRels( pruneRels( rels, - sourceMaxRelations, relPartitions, (Function) r -> r.getSource()), - targetMaxRelations, relPartitions, (Function) r -> r.getTarget()); + sourceMaxRelations, relPartitions, (Function) Relation::getSource), + targetMaxRelations, relPartitions, (Function) Relation::getTarget); spark .createDataset(pruned.rdd(), Encoders.bean(Relation.class)) .repartition(relPartitions) @@ -170,8 +170,8 @@ public class PrepareRelationsJob { .map( (MapFunction) s -> OBJECT_MAPPER.readValue(s, Relation.class), Encoders.kryo(Relation.class)) - .filter((FilterFunction) rel -> rel.getDataInfo().getDeletedbyinference() == false) - .filter((FilterFunction) rel -> relationFilter.contains(rel.getRelClass()) == false) + .filter((FilterFunction) rel -> !rel.getDataInfo().getDeletedbyinference()) + .filter((FilterFunction) rel -> !relationFilter.contains(rel.getRelClass())) .groupByKey( (MapFunction) Relation::getSource, Encoders.STRING()) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java index 28c1111d6..01d161b6b 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java @@ -3,6 +3,9 @@ package eu.dnetlib.dhp.oa.provision; public class ProvisionConstants { + private ProvisionConstants() { + } + public static final String LAYOUT = "index"; public static final String INTERPRETATION = "openaire"; public static final String SEPARATOR = "-"; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java index 410aff5ba..0033978bf 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java @@ -17,7 +17,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient; import eu.dnetlib.dhp.oa.provision.utils.ZkServers; import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class SolrAdminApplication implements Closeable { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java index b44ed7446..b383f67ef 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java @@ -22,7 +22,6 @@ import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Maps; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -40,7 +39,7 @@ public class XmlConverterJob { private static final Logger log = LoggerFactory.getLogger(XmlConverterJob.class); - public static final String schemaLocation = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd"; + public static final String SCHEMA_LOCATION = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd"; public static void main(String[] args) throws Exception { @@ -95,7 +94,7 @@ public class XmlConverterJob { prepareAccumulators(spark.sparkContext()), contextMapper, false, - schemaLocation, + SCHEMA_LOCATION, otherDsTypeId); final List paths = HdfsSupport @@ -186,7 +185,7 @@ public class XmlConverterJob { accumulators .put( "organizationOrganization_dedup_merges", - sc.longAccumulator("resultProject_outcome_produces")); + sc.longAccumulator("organizationOrganization_dedup_merges")); accumulators .put( "datasourceOrganization_provision_isProvidedBy", diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java index a321bdba9..e7dbdbd2b 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java @@ -3,14 +3,12 @@ package eu.dnetlib.dhp.oa.provision; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; import java.io.StringReader; import java.io.StringWriter; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Optional; -import javax.swing.text.html.Option; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.stream.StreamResult; @@ -163,7 +161,7 @@ public class XmlIndexingJob { case HDFS: spark .createDataset( - docs.map(s -> new SerializableSolrInputDocument(s)).rdd(), + docs.map(SerializableSolrInputDocument::new).rdd(), Encoders.kryo(SerializableSolrInputDocument.class)) .write() .mode(SaveMode.Overwrite) @@ -174,14 +172,13 @@ public class XmlIndexingJob { } } - protected static String toIndexRecord(Transformer tr, final String record) { + protected static String toIndexRecord(Transformer tr, final String xmlRecord) { final StreamResult res = new StreamResult(new StringWriter()); try { - tr.transform(new StreamSource(new StringReader(record)), res); + tr.transform(new StreamSource(new StringReader(xmlRecord)), res); return res.getWriter().toString(); - } catch (Throwable e) { - log.error("XPathException on record: \n {}", record, e); - throw new IllegalArgumentException(e); + } catch (TransformerException e) { + throw new IllegalArgumentException("XPathException on record: \n" + xmlRecord, e); } } @@ -192,8 +189,6 @@ public class XmlIndexingJob { * @param xslt xslt for building the index record transformer * @param fields the list of fields * @return the javax.xml.transform.Transformer - * @throws ISLookUpException could happen - * @throws IOException could happen * @throws TransformerException could happen */ protected static String getLayoutTransformer(String format, String fields, String xslt) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java index 2eb9cf38b..0fb109fbb 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.oa.provision.model; import java.io.Serializable; -import java.util.ArrayList; import java.util.LinkedList; import java.util.List; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java index c09ed86e5..d4ee24c14 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java @@ -11,6 +11,9 @@ import eu.dnetlib.dhp.schema.common.ModelSupport; public class ProvisionModelSupport { + private ProvisionModelSupport() { + } + public static Class[] getModelClasses() { List> modelClasses = Lists.newArrayList(ModelSupport.getOafModelClasses()); modelClasses diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java index e15ceff76..5c78d1826 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java @@ -34,7 +34,6 @@ public class RelatedEntity implements Serializable { private Qualifier datasourcetype; private Qualifier datasourcetypeui; private Qualifier openairecompatibility; - // private String aggregatortype; // organization private String legalname; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java index 7391569ed..a91050403 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java @@ -5,7 +5,6 @@ import java.util.Comparator; import java.util.Optional; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java index ac418f2b9..bcaf40603 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java @@ -9,6 +9,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.google.common.base.Joiner; @@ -23,7 +24,7 @@ public class ContextMapper extends HashMap implements Serial private static final String XQUERY = "for $x in //RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ContextDSResourceType']//*[name()='context' or name()='category' or name()='concept'] return "; public static ContextMapper fromIS(final String isLookupUrl) - throws DocumentException, ISLookUpException { + throws DocumentException, ISLookUpException, SAXException { ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); StringBuilder sb = new StringBuilder(""); Joiner.on("").appendTo(sb, isLookUp.quickSearchProfile(XQUERY)); @@ -31,10 +32,12 @@ public class ContextMapper extends HashMap implements Serial return fromXml(sb.toString()); } - public static ContextMapper fromXml(final String xml) throws DocumentException { + public static ContextMapper fromXml(final String xml) throws DocumentException, SAXException { final ContextMapper contextMapper = new ContextMapper(); - final Document doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(xml)); for (Object o : doc.selectNodes("//entry")) { Node node = (Node) o; String id = node.valueOf("./@id"); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java index d2131ef28..3750d0173 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java @@ -8,16 +8,18 @@ import java.util.Set; import com.google.common.collect.Sets; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; public class GraphMappingUtils { public static final String SEPARATOR = "_"; - public static Set authorPidTypes = Sets + public static final Set authorPidTypes = Sets .newHashSet( ModelConstants.ORCID, ModelConstants.ORCID_PENDING, "magidentifier"); + private GraphMappingUtils() { + } + public static String removePrefix(final String s) { if (s.contains("|")) return substringAfter(s, "|"); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java index 29a51cb29..8c7c61361 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java @@ -25,11 +25,9 @@ public class ISLookupClient { * * @param format the Metadata format name * @return the string representation of the list of fields to be indexed - * @throws ISLookUpDocumentNotFoundException * @throws ISLookUpException */ - public String getLayoutSource(final String format) - throws ISLookUpDocumentNotFoundException, ISLookUpException { + public String getLayoutSource(final String format) throws ISLookUpException { return doLookup( String .format( @@ -41,7 +39,6 @@ public class ISLookupClient { * Method retrieves from the information system the openaireLayoutToRecordStylesheet * * @return the string representation of the XSLT contained in the transformation rule profile - * @throws ISLookUpDocumentNotFoundException * @throws ISLookUpException */ public String getLayoutTransformer() throws ISLookUpException { @@ -78,9 +75,9 @@ public class ISLookupClient { } private String doLookup(String xquery) throws ISLookUpException { - log.info(String.format("running xquery: %s", xquery)); + log.info("running xquery: {}", xquery); final String res = getIsLookup().getResourceProfileByQuery(xquery); - log.info(String.format("got response (100 chars): %s", StringUtils.left(res, 100) + " ...")); + log.info("got response (100 chars): {} ...", StringUtils.left(res, 100)); return res; } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/LicenseComparator.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/LicenseComparator.java deleted file mode 100644 index 9dbac1936..000000000 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/LicenseComparator.java +++ /dev/null @@ -1,69 +0,0 @@ - -package eu.dnetlib.dhp.oa.provision.utils; - -import java.util.Comparator; - -import eu.dnetlib.dhp.schema.oaf.Qualifier; - -public class LicenseComparator implements Comparator { - - @Override - public int compare(Qualifier left, Qualifier right) { - - if (left == null && right == null) - return 0; - if (left == null) - return 1; - if (right == null) - return -1; - - String lClass = left.getClassid(); - String rClass = right.getClassid(); - - if (lClass.equals(rClass)) - return 0; - - if (lClass.equals("OPEN SOURCE")) - return -1; - if (rClass.equals("OPEN SOURCE")) - return 1; - - if (lClass.equals("OPEN")) - return -1; - if (rClass.equals("OPEN")) - return 1; - - if (lClass.equals("6MONTHS")) - return -1; - if (rClass.equals("6MONTHS")) - return 1; - - if (lClass.equals("12MONTHS")) - return -1; - if (rClass.equals("12MONTHS")) - return 1; - - if (lClass.equals("EMBARGO")) - return -1; - if (rClass.equals("EMBARGO")) - return 1; - - if (lClass.equals("RESTRICTED")) - return -1; - if (rClass.equals("RESTRICTED")) - return 1; - - if (lClass.equals("CLOSED")) - return -1; - if (rClass.equals("CLOSED")) - return 1; - - if (lClass.equals("UNKNOWN")) - return -1; - if (rClass.equals("UNKNOWN")) - return 1; - - // Else (but unlikely), lexicographical ordering will do. - return lClass.compareTo(rClass); - } -} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java index f16ee260f..36028be9e 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java @@ -3,10 +3,10 @@ package eu.dnetlib.dhp.oa.provision.utils; import java.io.StringReader; import java.io.StringWriter; -import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Objects; import javax.xml.stream.*; import javax.xml.stream.events.Namespace; @@ -57,13 +57,13 @@ public class StreamingInputDocumentFactory { private static final int MAX_FIELD_LENGTH = 25000; private final ThreadLocal inputFactory = ThreadLocal - .withInitial(() -> XMLInputFactory.newInstance()); + .withInitial(XMLInputFactory::newInstance); private final ThreadLocal outputFactory = ThreadLocal - .withInitial(() -> XMLOutputFactory.newInstance()); + .withInitial(XMLOutputFactory::newInstance); private final ThreadLocal eventFactory = ThreadLocal - .withInitial(() -> XMLEventFactory.newInstance()); + .withInitial(XMLEventFactory::newInstance); private final String version; @@ -126,6 +126,8 @@ public class StreamingInputDocumentFactory { return indexDocument; } catch (XMLStreamException e) { throw new IllegalStateException(e); + } finally { + inputFactory.remove(); } } @@ -143,9 +145,9 @@ public class StreamingInputDocumentFactory { /** * Parse the targetFields block and add fields to the solr document. * - * @param indexDocument - * @param parser - * @throws XMLStreamException + * @param indexDocument the document being populated + * @param parser the XML parser + * @throws XMLStreamException when the parser cannot parse the XML */ protected void parseTargetFields( final SolrInputDocument indexDocument, final XMLEventReader parser) @@ -165,9 +167,10 @@ public class StreamingInputDocumentFactory { final XMLEvent text = parser.nextEvent(); String data = getText(text); - - addField(indexDocument, fieldName, data); - hasFields = true; + if (Objects.nonNull(data)) { + addField(indexDocument, fieldName, data); + hasFields = true; + } } } @@ -192,36 +195,43 @@ public class StreamingInputDocumentFactory { final List nsList, final String dnetResult) throws XMLStreamException { + final XMLEventWriter writer = outputFactory.get().createXMLEventWriter(results); + final XMLEventFactory xmlEventFactory = this.eventFactory.get(); + try { - for (Namespace ns : nsList) { - eventFactory.get().createNamespace(ns.getPrefix(), ns.getNamespaceURI()); - } - - StartElement newRecord = eventFactory.get().createStartElement("", null, RESULT, null, nsList.iterator()); - - // new root record - writer.add(newRecord); - - // copy the rest as it is - while (parser.hasNext()) { - final XMLEvent resultEvent = parser.nextEvent(); - - // TODO: replace with depth tracking instead of close tag tracking. - if (resultEvent.isEndElement() - && resultEvent.asEndElement().getName().getLocalPart().equals(dnetResult)) { - writer.add(eventFactory.get().createEndElement("", null, RESULT)); - break; + for (Namespace ns : nsList) { + xmlEventFactory.createNamespace(ns.getPrefix(), ns.getNamespaceURI()); } - writer.add(resultEvent); + StartElement newRecord = xmlEventFactory.createStartElement("", null, RESULT, null, nsList.iterator()); + + // new root record + writer.add(newRecord); + + // copy the rest as it is + while (parser.hasNext()) { + final XMLEvent resultEvent = parser.nextEvent(); + + // TODO: replace with depth tracking instead of close tag tracking. + if (resultEvent.isEndElement() + && resultEvent.asEndElement().getName().getLocalPart().equals(dnetResult)) { + writer.add(xmlEventFactory.createEndElement("", null, RESULT)); + break; + } + + writer.add(resultEvent); + } + writer.close(); + indexDocument.addField(INDEX_RESULT, results.toString()); + } finally { + outputFactory.remove(); + eventFactory.remove(); } - writer.close(); - indexDocument.addField(INDEX_RESULT, results.toString()); } /** - * Helper used to add a field to a solr doc. It avoids to add empy fields + * Helper used to add a field to a solr doc, avoids adding empty fields * * @param indexDocument * @param field @@ -231,7 +241,6 @@ public class StreamingInputDocumentFactory { final SolrInputDocument indexDocument, final String field, final String value) { String cleaned = value.trim(); if (!cleaned.isEmpty()) { - // log.info("\n\n adding field " + field.toLowerCase() + " value: " + cleaned + "\n"); indexDocument.addField(field.toLowerCase(), cleaned); } } @@ -243,9 +252,9 @@ public class StreamingInputDocumentFactory { * @return the */ protected final String getText(final XMLEvent text) { - if (text.isEndElement()) // log.warn("skipping because isEndOfElement " + - // text.asEndElement().getName().getLocalPart()); + if (text.isEndElement()) { return ""; + } final String data = text.asCharacters().getData(); if (data != null && data.length() > MAX_FIELD_LENGTH) { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java index 173ba326a..7487f0956 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java @@ -50,7 +50,7 @@ public class TemplateFactory { public String getChild(final String name, final String id, final List metadata) { return getTemplate(resources.getChild()) .add("name", name) - .add("hasId", !(id == null)) + .add("hasId", id != null) .add("id", id != null ? escapeXml(removePrefix(id)) : "") .add("metadata", metadata) .render(); @@ -103,7 +103,7 @@ public class TemplateFactory { (webresources != null ? webresources : new ArrayList()) .stream() .filter(StringUtils::isNotBlank) - .map(w -> getWebResource(w)) + .map(this::getWebResource) .collect(Collectors.toList())) .render(); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 2c8240290..adc8aca9e 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -25,8 +25,8 @@ import org.dom4j.Node; import org.dom4j.io.OutputFormat; import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; +import org.xml.sax.SAXException; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.collect.Lists; @@ -46,6 +46,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { + public static final String DISALLOW_DOCTYPE_DECL = "http://apache.org/xml/features/disallow-doctype-decl"; private final Map accumulators; private final Set specialDatasourceTypes; @@ -56,8 +57,6 @@ public class XmlRecordFactory implements Serializable { private boolean indent = false; - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public XmlRecordFactory( final ContextMapper contextMapper, final boolean indent, @@ -86,7 +85,6 @@ public class XmlRecordFactory implements Serializable { final Set contexts = Sets.newHashSet(); - // final OafEntity entity = toOafEntity(je.getEntity()); OafEntity entity = je.getEntity(); TemplateFactory templateFactory = new TemplateFactory(); try { @@ -95,8 +93,7 @@ public class XmlRecordFactory implements Serializable { final List metadata = metadata(type, entity, contexts); // rels has to be processed before the contexts because they enrich the contextMap with - // the - // funding info. + // the funding info. final List links = je.getLinks(); final List relations = links .stream() @@ -122,34 +119,11 @@ public class XmlRecordFactory implements Serializable { } } - private static OafEntity parseOaf(final String json, final String type) { - try { - switch (EntityType.valueOf(type)) { - case publication: - return OBJECT_MAPPER.readValue(json, Publication.class); - case dataset: - return OBJECT_MAPPER.readValue(json, Dataset.class); - case otherresearchproduct: - return OBJECT_MAPPER.readValue(json, OtherResearchProduct.class); - case software: - return OBJECT_MAPPER.readValue(json, Software.class); - case datasource: - return OBJECT_MAPPER.readValue(json, Datasource.class); - case organization: - return OBJECT_MAPPER.readValue(json, Organization.class); - case project: - return OBJECT_MAPPER.readValue(json, Project.class); - default: - throw new IllegalArgumentException("invalid type: " + type); - } - } catch (IOException e) { - throw new IllegalArgumentException(e); - } - } - private String printXML(String xml, boolean indent) { try { - final Document doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature(DISALLOW_DOCTYPE_DECL, true); + final Document doc = reader.read(new StringReader(xml)); OutputFormat format = indent ? OutputFormat.createPrettyPrint() : OutputFormat.createCompactFormat(); format.setExpandEmptyElements(false); format.setSuppressDeclaration(true); @@ -157,7 +131,7 @@ public class XmlRecordFactory implements Serializable { XMLWriter writer = new XMLWriter(sw, format); writer.write(doc); return sw.toString(); - } catch (IOException | DocumentException e) { + } catch (IOException | DocumentException | SAXException e) { throw new IllegalArgumentException("Unable to indent XML. Invalid record:\n" + xml, e); } } @@ -203,7 +177,7 @@ public class XmlRecordFactory implements Serializable { final Result r = (Result) entity; if (r.getContext() != null) { - contexts.addAll(r.getContext().stream().map(c -> c.getId()).collect(Collectors.toList())); + contexts.addAll(r.getContext().stream().map(Context::getId).collect(Collectors.toList())); /* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */ if (contexts.contains("dh-ch::subcommunity::2")) { contexts.add("clarin"); @@ -260,14 +234,14 @@ public class XmlRecordFactory implements Serializable { .collect( Collectors .groupingBy( - p -> p.getValue(), + StructuredProperty::getValue, Collectors .mapping( p -> p, Collectors.minBy(new AuthorPidTypeComparator())))) .values() .stream() - .map(op -> op.get()) + .map(Optional::get) .forEach( sp -> { String pidType = getAuthorPidType(sp.getQualifier().getClassid()); @@ -938,7 +912,7 @@ public class XmlRecordFactory implements Serializable { .getFundingtree() .stream() .filter(Objects::nonNull) - .map(ft -> ft.getValue()) + .map(Field::getValue) .collect(Collectors.toList())); } @@ -1069,7 +1043,7 @@ public class XmlRecordFactory implements Serializable { .getFundingtree() .stream() .peek(ft -> fillContextMap(ft, contexts)) - .map(ft -> getRelFundingTree(ft)) + .map(XmlRecordFactory::getRelFundingTree) .collect(Collectors.toList())); } break; @@ -1112,7 +1086,7 @@ public class XmlRecordFactory implements Serializable { final List links = je.getLinks(); List children = links .stream() - .filter(link -> isDuplicate(link)) + .filter(this::isDuplicate) .map(link -> { final String targetType = link.getTarget().getType(); final String name = ModelSupport.getMainType(EntityType.valueOf(targetType)); @@ -1263,7 +1237,7 @@ public class XmlRecordFactory implements Serializable { return extraInfo != null ? extraInfo .stream() - .map(e -> XmlSerializationUtils.mapExtraInfo(e)) + .map(XmlSerializationUtils::mapExtraInfo) .collect(Collectors.toList()) : Lists.newArrayList(); } @@ -1287,9 +1261,6 @@ public class XmlRecordFactory implements Serializable { if (def == null) { continue; - // throw new IllegalStateException(String.format("cannot find context for id - // '%s'", - // id)); } if (def.getName().equals("context")) { @@ -1327,7 +1298,8 @@ public class XmlRecordFactory implements Serializable { private Transformer getTransformer() { try { - Transformer transformer = TransformerFactory.newInstance().newTransformer(); + final TransformerFactory factory = TransformerFactory.newInstance(); + Transformer transformer = factory.newTransformer(); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); return transformer; } catch (TransformerConfigurationException e) { @@ -1354,8 +1326,10 @@ public class XmlRecordFactory implements Serializable { Document fundingPath; try { - fundingPath = new SAXReader().read(new StringReader(xmlTree)); - } catch (final DocumentException e) { + final SAXReader reader = new SAXReader(); + reader.setFeature(DISALLOW_DOCTYPE_DECL, true); + fundingPath = reader.read(new StringReader(xmlTree)); + } catch (final DocumentException | SAXException e) { throw new RuntimeException(e); } try { @@ -1407,7 +1381,9 @@ public class XmlRecordFactory implements Serializable { protected static String getRelFundingTree(final String xmlTree) { String funding = ""; try { - final Document ftree = new SAXReader().read(new StringReader(xmlTree)); + final SAXReader reader = new SAXReader(); + reader.setFeature(DISALLOW_DOCTYPE_DECL, true); + final Document ftree = reader.read(new StringReader(xmlTree)); funding = ""; funding += getFunderElement(ftree); @@ -1427,7 +1403,7 @@ public class XmlRecordFactory implements Serializable { + e.getName() + ">"; } - } catch (final DocumentException e) { + } catch (final DocumentException | SAXException e) { throw new IllegalArgumentException( "unable to parse funding tree: " + xmlTree + "\n" + e.getMessage()); } finally { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java index 8195467b1..213a62b32 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java @@ -11,9 +11,12 @@ public class XmlSerializationUtils { // XML 1.0 // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] - private static final String xml10pattern = "[^" + "\u0009\r\n" + "\u0020-\uD7FF" + "\uE000-\uFFFD" + private static final String XML_10_PATTERN = "[^" + "\u0009\r\n" + "\u0020-\uD7FF" + "\uE000-\uFFFD" + "\ud800\udc00-\udbff\udfff" + "]"; + private XmlSerializationUtils() { + } + public static String mapJournal(Journal j) { final String attrs = new StringBuilder() .append(attr("issn", j.getIssnPrinted())) @@ -50,12 +53,12 @@ public class XmlSerializationUtils { public static String escapeXml(final String value) { return value - .replaceAll("&", "&") - .replaceAll("<", "<") - .replaceAll(">", ">") - .replaceAll("\"", """) - .replaceAll("'", "'") - .replaceAll(xml10pattern, ""); + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace("\"", """) + .replace("'", "'") + .replaceAll(XML_10_PATTERN, ""); } public static String parseDataInfo(final DataInfo dataInfo) { @@ -70,18 +73,6 @@ public class XmlSerializationUtils { .toString(); } - private static StringBuilder dataInfoAsAttributes(final StringBuilder sb, final DataInfo info) { - return sb - .append( - attr("inferred", info.getInferred() != null ? info.getInferred().toString() : "")) - .append(attr("inferenceprovenance", info.getInferenceprovenance())) - .append( - attr( - "provenanceaction", - info.getProvenanceaction() != null ? info.getProvenanceaction().getClassid() : "")) - .append(attr("trust", info.getTrust())); - } - public static String mapKeyValue(final String name, final KeyValue kv) { return new StringBuilder() .append("<") diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java index 6cec3ed53..903150ca8 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java @@ -25,7 +25,7 @@ public class ZkServers { // quorum0:2182,quorum1:2182,quorum2:2182,quorum3:2182,quorum4:2182/solr-dev-openaire String urls = zkUrl; final Optional chRoot = Optional.of(SEPARATOR + StringUtils.substringAfterLast(zkUrl, SEPARATOR)); - if (chRoot.isPresent() && StringUtils.isNotBlank(chRoot.get())) { + if (StringUtils.isNotBlank(chRoot.get())) { log.debug(String.format("found zk chroot %s", chRoot)); urls = zkUrl.replace(chRoot.get(), ""); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java index ffeb0995d..e5faccd0f 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java @@ -45,6 +45,7 @@ public class DropAndCreateESIndex { .requireNonNull( DropAndCreateESIndex.class.getResourceAsStream("/eu/dnetlib/dhp/sx/provision/cluster.json"))); + @SuppressWarnings("unchecked") Map clusterMap = new ObjectMapper().readValue(clusterJson, Map.class); final String ip = clusterMap.get(cluster).split(",")[0]; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java index 3f842ef34..dd08215d5 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java @@ -44,6 +44,7 @@ public class SparkIndexCollectionOnES { .requireNonNull( DropAndCreateESIndex.class.getResourceAsStream("/eu/dnetlib/dhp/sx/provision/cluster.json"))); + @SuppressWarnings("unchecked") final Map clusterMap = new ObjectMapper().readValue(clusterJson, Map.class); final SparkSession spark = SparkSession.builder().config(conf).getOrCreate(); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index e1ed4ffe4..9f022454b 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -4,7 +4,7 @@ package eu.dnetlib.dhp.oa.provision; import static org.junit.jupiter.api.Assertions.assertNotNull; import java.io.IOException; -import java.util.List; +import java.util.Objects; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; @@ -16,11 +16,9 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; -import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory; @@ -35,7 +33,7 @@ import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory; * * The input is a JoinedEntity, i.e. a json representation of an OpenAIRE entity that embeds all the linked entities. */ -public class IndexRecordTransformerTest { +class IndexRecordTransformerTest { public static final String VERSION = "2021-04-15T10:05:53Z"; public static final String DSID = "b9ee796a-c49f-4473-a708-e7d67b84c16d_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"; @@ -48,23 +46,23 @@ public class IndexRecordTransformerTest { } @Test - public void testPreBuiltRecordTransformation() throws IOException, TransformerException { - String record = IOUtils.toString(getClass().getResourceAsStream("record.xml")); + void testPreBuiltRecordTransformation() throws IOException, TransformerException { + String record = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("record.xml"))); testRecordTransformation(record); } @Test - public void testPublicationRecordTransformation() throws IOException, TransformerException { + void testPublicationRecordTransformation() throws IOException, TransformerException { - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, XmlRecordFactoryTest.otherDsTypeId); Publication p = load("publication.json", Publication.class); Project pj = load("project.json", Project.class); Relation rel = load("relToValidatedProject.json", Relation.class); - JoinedEntity je = new JoinedEntity<>(p); + JoinedEntity je = new JoinedEntity<>(p); je .setLinks( Lists @@ -80,8 +78,9 @@ public class IndexRecordTransformerTest { } private void testRecordTransformation(String record) throws IOException, TransformerException { - String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); - String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); + String fields = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("fields.xml"))); + String xslt = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("layoutToRecordTransformer.xsl"))); String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt); @@ -99,7 +98,7 @@ public class IndexRecordTransformerTest { private T load(String fileName, Class clazz) throws IOException { return XmlRecordFactoryTest.OBJECT_MAPPER - .readValue(IOUtils.toString(getClass().getResourceAsStream(fileName)), clazz); + .readValue(IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream(fileName))), clazz); } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java index 6818cf6a5..c22a24185 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java @@ -1,6 +1,9 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -62,7 +65,7 @@ public class PrepareRelationsJobTest { } @Test - public void testRunPrepareRelationsJob(@TempDir Path testPath) throws Exception { + void testRunPrepareRelationsJob(@TempDir Path testPath) throws Exception { final int maxRelations = 20; PrepareRelationsJob @@ -83,7 +86,7 @@ public class PrepareRelationsJobTest { .as(Encoders.bean(Relation.class)) .cache(); - Assertions.assertEquals(maxRelations, out.count()); + assertEquals(maxRelations, out.count()); Dataset freq = out .toDF() @@ -97,13 +100,13 @@ public class PrepareRelationsJobTest { long participation = getRows(freq, PARTICIPATION).get(0).getAs("count"); long affiliation = getRows(freq, AFFILIATION).get(0).getAs("count"); - Assertions.assertTrue(participation == outcome); - Assertions.assertTrue(outcome > affiliation); - Assertions.assertTrue(participation > affiliation); + assertEquals(outcome, participation); + assertTrue(outcome > affiliation); + assertTrue(participation > affiliation); - Assertions.assertEquals(7, outcome); - Assertions.assertEquals(7, participation); - Assertions.assertEquals(6, affiliation); + assertEquals(7, outcome); + assertEquals(7, participation); + assertEquals(6, affiliation); } protected List getRows(Dataset freq, String col) { diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java index f57b8dcaf..9d5bff3cf 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java @@ -1,39 +1,42 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + import org.apache.solr.client.solrj.response.SolrPingResponse; import org.apache.solr.client.solrj.response.UpdateResponse; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -public class SolrAdminApplicationTest extends SolrTest { +class SolrAdminApplicationTest extends SolrTest { @Test - public void testPing() throws Exception { + void testPing() throws Exception { SolrPingResponse pingResponse = miniCluster.getSolrClient().ping(); log.info("pingResponse: '{}'", pingResponse.getStatus()); - Assertions.assertTrue(pingResponse.getStatus() == 0); + assertEquals(0, pingResponse.getStatus()); } @Test - public void testAdminApplication_DELETE() throws Exception { + void testAdminApplication_DELETE() throws Exception { SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost()); UpdateResponse rsp = (UpdateResponse) admin .execute(SolrAdminApplication.Action.DELETE_BY_QUERY, DEFAULT_COLLECTION, "*:*", false); - Assertions.assertTrue(rsp.getStatus() == 0); + assertEquals(0, rsp.getStatus()); } @Test - public void testAdminApplication_COMMIT() throws Exception { + void testAdminApplication_COMMIT() throws Exception { SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost()); UpdateResponse rsp = (UpdateResponse) admin.commit(DEFAULT_COLLECTION); - Assertions.assertTrue(rsp.getStatus() == 0); + assertEquals(0, rsp.getStatus()); } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java index 72f28fdf2..dc0a40471 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java @@ -28,7 +28,6 @@ public class SortableRelationKeyTest { .map(r -> SortableRelationKey.create(r, r.getSource())) .sorted() .forEach( - it -> { try { System.out.println(mapper.writeValueAsString(it)); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java index d7bcb3185..6f1956578 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java @@ -89,7 +89,7 @@ public class XmlIndexingJobTest extends SolrTest { } @Test - public void testXmlIndexingJob_onSolr() throws Exception { + void testXmlIndexingJob_onSolr() throws Exception { String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; @@ -112,7 +112,7 @@ public class XmlIndexingJobTest extends SolrTest { } @Test - public void testXmlIndexingJob_saveOnHDFS() throws Exception { + void testXmlIndexingJob_saveOnHDFS() throws Exception { final String ID_XPATH = "//header/*[local-name()='objIdentifier']"; String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 221049f90..d89b3b068 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -13,6 +13,7 @@ import org.dom4j.DocumentException; import org.dom4j.io.SAXReader; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; @@ -21,7 +22,6 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; -import eu.dnetlib.dhp.oa.provision.utils.ContextDef; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; import eu.dnetlib.dhp.schema.oaf.Dataset; @@ -29,7 +29,7 @@ import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; -public class XmlRecordFactoryTest { +class XmlRecordFactoryTest { public static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource"; @@ -37,11 +37,11 @@ public class XmlRecordFactoryTest { .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @Test - public void testXMLRecordFactory() throws IOException, DocumentException { + void testXMLRecordFactory() throws IOException, DocumentException { ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, otherDsTypeId); Publication p = OBJECT_MAPPER @@ -72,11 +72,11 @@ public class XmlRecordFactoryTest { } @Test - public void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException { + void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException { ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, otherDsTypeId); Publication p = OBJECT_MAPPER @@ -104,11 +104,11 @@ public class XmlRecordFactoryTest { } @Test - public void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException { + void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException { ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, otherDsTypeId); Publication p = OBJECT_MAPPER @@ -135,7 +135,7 @@ public class XmlRecordFactoryTest { } @Test - public void testEnermapsRecord() throws IOException, DocumentException { + void testEnermapsRecord() throws IOException, DocumentException, SAXException { String contextmap = "" + @@ -144,7 +144,7 @@ public class XmlRecordFactoryTest { ""; ContextMapper contextMapper = ContextMapper.fromXml(contextmap); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, otherDsTypeId); Dataset d = OBJECT_MAPPER diff --git a/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java b/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java index 5b2e6804b..93f1bc087 100644 --- a/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java +++ b/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java @@ -7,16 +7,8 @@ package eu.dnetlib.oa.graph.usagerawdata.export; import java.sql.Connection; -import java.sql.DriverManager; import java.sql.SQLException; -import java.sql.Statement; -import java.util.Properties; -import org.apache.log4j.Logger; - -/** - * @author D. Pierrakos, S. Zoupanos - */ /** * @author D. Pierrakos, S. Zoupanos */ @@ -31,7 +23,9 @@ public abstract class ConnectDB { private static String dbImpalaUrl; private static String usageStatsDBSchema; private static String statsDBSchema; - private final static Logger log = Logger.getLogger(ConnectDB.class); + + private ConnectDB() { + } static void init() throws ClassNotFoundException { @@ -72,10 +66,6 @@ public abstract class ConnectDB { } private static Connection connectHive() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbHiveUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbHiveUrl); cpds.setAcquireIncrement(1); @@ -97,10 +87,6 @@ public abstract class ConnectDB { } private static Connection connectImpala() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbImpalaUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbImpalaUrl); cpds.setAcquireIncrement(1); diff --git a/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java b/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java index e53709f1a..afd7f9807 100644 --- a/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java +++ b/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java @@ -7,20 +7,14 @@ package eu.dnetlib.oa.graph.usagestatsbuild.export; import java.sql.Connection; -import java.sql.DriverManager; import java.sql.SQLException; -import java.sql.Statement; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; -import java.util.Properties; import org.apache.log4j.Logger; -/** - * @author D. Pierrakos, S. Zoupanos - */ /** * @author D. Pierrakos, S. Zoupanos */ @@ -37,7 +31,9 @@ public abstract class ConnectDB { private static String usageStatsDBSchema; private static String usagestatsPermanentDBSchema; private static String statsDBSchema; - private final static Logger log = Logger.getLogger(ConnectDB.class); + + private ConnectDB() { + } static void init() throws ClassNotFoundException { @@ -94,10 +90,6 @@ public abstract class ConnectDB { } private static Connection connectHive() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbHiveUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbHiveUrl); cpds.setAcquireIncrement(1); @@ -119,10 +111,6 @@ public abstract class ConnectDB { } private static Connection connectImpala() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbImpalaUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbImpalaUrl); cpds.setAcquireIncrement(1);