diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java index 10a25fdc3..a642dab70 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java @@ -8,8 +8,6 @@ import java.util.List; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; import org.apache.maven.plugin.AbstractMojo; -import org.apache.maven.plugin.MojoExecutionException; -import org.apache.maven.plugin.MojoFailureException; /** * Generates oozie properties which were not provided from commandline. @@ -27,7 +25,7 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo { }; @Override - public void execute() throws MojoExecutionException, MojoFailureException { + public void execute() { if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR) && !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) { String generatedSandboxName = generateSandboxName( @@ -46,24 +44,24 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo { /** * Generates sandbox name from workflow source directory. * - * @param wfSourceDir + * @param wfSourceDir workflow source directory * @return generated sandbox name */ private String generateSandboxName(String wfSourceDir) { // utilize all dir names until finding one of the limiters - List sandboxNameParts = new ArrayList(); + List sandboxNameParts = new ArrayList<>(); String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar); ArrayUtils.reverse(tokens); if (tokens.length > 0) { for (String token : tokens) { for (String limiter : limiters) { if (limiter.equals(token)) { - return sandboxNameParts.size() > 0 + return !sandboxNameParts.isEmpty() ? StringUtils.join(sandboxNameParts.toArray()) : null; } } - if (sandboxNameParts.size() > 0) { + if (!sandboxNameParts.isEmpty()) { sandboxNameParts.add(0, File.separator); } sandboxNameParts.add(0, token); diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java index d195ca86e..e3cdf5a22 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java @@ -16,6 +16,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -289,7 +290,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo { */ protected List getEscapeChars(String escapeChars) { List tokens = getListFromCSV(escapeChars); - List realTokens = new ArrayList(); + List realTokens = new ArrayList<>(); for (String token : tokens) { String realToken = getRealToken(token); realTokens.add(realToken); @@ -324,7 +325,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo { * @return content */ protected String getContent(String comment, Properties properties, List escapeTokens) { - List names = new ArrayList(properties.stringPropertyNames()); + List names = new ArrayList<>(properties.stringPropertyNames()); Collections.sort(names); StringBuilder sb = new StringBuilder(); if (!StringUtils.isBlank(comment)) { @@ -352,7 +353,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo { throws MojoExecutionException { try { String content = getContent(comment, properties, escapeTokens); - FileUtils.writeStringToFile(file, content, ENCODING_UTF8); + FileUtils.writeStringToFile(file, content, StandardCharsets.UTF_8); } catch (IOException e) { throw new MojoExecutionException("Error creating properties file", e); } @@ -399,9 +400,9 @@ public class WritePredefinedProjectProperties extends AbstractMojo { */ protected static final List getListFromCSV(String csv) { if (StringUtils.isBlank(csv)) { - return new ArrayList(); + return new ArrayList<>(); } - List list = new ArrayList(); + List list = new ArrayList<>(); String[] tokens = StringUtils.split(csv, ","); for (String token : tokens) { list.add(token.trim()); diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java index 4bfcd3b33..2ff6bea30 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java @@ -9,18 +9,18 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; /** @author mhorst, claudio.atzori */ -public class GenerateOoziePropertiesMojoTest { +class GenerateOoziePropertiesMojoTest { private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo(); @BeforeEach - public void clearSystemProperties() { + void clearSystemProperties() { System.clearProperty(PROPERTY_NAME_SANDBOX_NAME); System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR); } @Test - public void testExecuteEmpty() throws Exception { + void testExecuteEmpty() throws Exception { // execute mojo.execute(); @@ -29,7 +29,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteSandboxNameAlreadySet() throws Exception { + void testExecuteSandboxNameAlreadySet() throws Exception { // given String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers"; String sandboxName = "originalSandboxName"; @@ -44,7 +44,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteEmptyWorkflowSourceDir() throws Exception { + void testExecuteEmptyWorkflowSourceDir() throws Exception { // given String workflowSourceDir = ""; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); @@ -57,7 +57,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteNullSandboxNameGenerated() throws Exception { + void testExecuteNullSandboxNameGenerated() throws Exception { // given String workflowSourceDir = "eu/dnetlib/dhp/"; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); @@ -70,7 +70,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecute() throws Exception { + void testExecute() throws Exception { // given String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers"; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); @@ -83,7 +83,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteWithoutRoot() throws Exception { + void testExecuteWithoutRoot() throws Exception { // given String workflowSourceDir = "wf/transformers"; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java index 0b3ea9653..84b962b4b 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java @@ -20,7 +20,7 @@ import org.mockito.junit.jupiter.MockitoExtension; /** @author mhorst, claudio.atzori */ @ExtendWith(MockitoExtension.class) -public class WritePredefinedProjectPropertiesTest { +class WritePredefinedProjectPropertiesTest { @Mock private MavenProject mavenProject; @@ -39,7 +39,7 @@ public class WritePredefinedProjectPropertiesTest { // ----------------------------------- TESTS --------------------------------------------- @Test - public void testExecuteEmpty() throws Exception { + void testExecuteEmpty() throws Exception { // execute mojo.execute(); @@ -50,7 +50,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithProjectProperties() throws Exception { + void testExecuteWithProjectProperties() throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -70,7 +70,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test() - public void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) { + void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -84,7 +84,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception { + void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -108,7 +108,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception { + void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -132,7 +132,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception { + void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -164,7 +164,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder) + void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; @@ -194,7 +194,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromBlankLocation() { + void testExecuteIncludingPropertyKeysFromBlankLocation() { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -214,7 +214,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder) + void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; @@ -247,7 +247,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder) + void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; @@ -273,7 +273,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception { + void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception { // given mojo.setQuiet(true); mojo.setIncludePropertyKeysFromFiles(new String[] { @@ -290,7 +290,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromInvalidFile() { + void testExecuteIncludingPropertyKeysFromInvalidFile() { // given mojo.setIncludePropertyKeysFromFiles(new String[] { "invalid location" @@ -301,7 +301,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception { + void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception { // given mojo.setIncludeEnvironmentVariables(true); @@ -318,7 +318,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception { + void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception { // given String key = "systemPropertyKey"; String value = "systemPropertyValue"; @@ -337,7 +337,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder) + void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder) throws Exception { // given String key = "systemPropertyKey "; diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 4c7810c47..c057123b1 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -117,6 +117,11 @@ eu.dnetlib.dhp dhp-schemas + + + com.opencsv + opencsv + diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java deleted file mode 100644 index c53b83561..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java +++ /dev/null @@ -1,14 +0,0 @@ - -package eu.dnetlib.dhp.application; - -import java.io.*; -import java.util.Map; -import java.util.Properties; - -import org.apache.hadoop.conf.Configuration; - -import com.google.common.collect.Maps; - -public class ApplicationUtils { - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java index 0429bc25d..72c1f6a5e 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java @@ -56,13 +56,13 @@ public class ArgumentApplicationParser implements Serializable { final StringWriter stringWriter = new StringWriter(); IOUtils.copy(gis, stringWriter); return stringWriter.toString(); - } catch (Throwable e) { - log.error("Wrong value to decompress:" + abstractCompressed); - throw new RuntimeException(e); + } catch (IOException e) { + log.error("Wrong value to decompress: {}", abstractCompressed); + throw new IllegalArgumentException(e); } } - public static String compressArgument(final String value) throws Exception { + public static String compressArgument(final String value) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); GZIPOutputStream gzip = new GZIPOutputStream(out); gzip.write(value.getBytes()); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java b/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java index 7004112e4..f34326d67 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java @@ -9,9 +9,6 @@ public class OptionsParameter { private boolean paramRequired; private boolean compressed; - public OptionsParameter() { - } - public String getParamName() { return paramName; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java b/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java index 12937a197..fbbbffcbb 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java @@ -34,7 +34,7 @@ public class ApiDescriptor { return params; } - public void setParams(final HashMap params) { + public void setParams(final Map params) { this.params = params; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java index 108edad47..a62a0ac79 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java @@ -12,6 +12,9 @@ public class Constants { public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/"; + private Constants() { + } + static { accessRightsCoarMap.put("OPEN", "c_abf2"); accessRightsCoarMap.put("RESTRICTED", "c_16ec"); @@ -49,4 +52,10 @@ public class Constants { public static final String CONTENT_INVALIDRECORDS = "InvalidRecords"; public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems"; + // IETF Draft and used by Repositories like ZENODO , not included in APACHE HTTP java packages + // see https://ietf-wg-httpapi.github.io/ratelimit-headers/draft-ietf-httpapi-ratelimit-headers.html + public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT = "X-RateLimit-Limit"; + public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING = "X-RateLimit-Remaining"; + public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_RESET = "X-RateLimit-Reset"; + } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java index 44599eb83..8ceee5c8a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java @@ -84,7 +84,7 @@ public class GraphResultMapper implements Serializable { .setDocumentationUrl( value .stream() - .map(v -> v.getValue()) + .map(Field::getValue) .collect(Collectors.toList()))); Optional @@ -100,20 +100,20 @@ public class GraphResultMapper implements Serializable { .setContactgroup( Optional .ofNullable(ir.getContactgroup()) - .map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setContactperson( Optional .ofNullable(ir.getContactperson()) - .map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setTool( Optional .ofNullable(ir.getTool()) - .map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); @@ -123,7 +123,8 @@ public class GraphResultMapper implements Serializable { Optional .ofNullable(input.getAuthor()) - .ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList()))); + .ifPresent( + ats -> out.setAuthor(ats.stream().map(GraphResultMapper::getAuthor).collect(Collectors.toList()))); // I do not map Access Right UNKNOWN or OTHER @@ -210,7 +211,7 @@ public class GraphResultMapper implements Serializable { if (oInst.isPresent()) { out .setInstance( - oInst.get().stream().map(i -> getInstance(i)).collect(Collectors.toList())); + oInst.get().stream().map(GraphResultMapper::getInstance).collect(Collectors.toList())); } @@ -230,7 +231,7 @@ public class GraphResultMapper implements Serializable { .stream() .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) .collect(Collectors.toList()); - if (iTitle.size() > 0) { + if (!iTitle.isEmpty()) { out.setMaintitle(iTitle.get(0).getValue()); } @@ -239,7 +240,7 @@ public class GraphResultMapper implements Serializable { .stream() .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) .collect(Collectors.toList()); - if (iTitle.size() > 0) { + if (!iTitle.isEmpty()) { out.setSubtitle(iTitle.get(0).getValue()); } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java index 7dc0e4417..d0909642c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java @@ -14,38 +14,33 @@ public class MakeTarArchive implements Serializable { private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException { Path hdfsWritePath = new Path(outputPath); - FSDataOutputStream fsDataOutputStream = null; if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, true); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); - - return new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream()); + return new TarArchiveOutputStream(fileSystem.create(hdfsWritePath).getWrappedStream()); } private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name) throws IOException { Path hdfsWritePath = new Path(outputPath); - FSDataOutputStream fsDataOutputStream = null; if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, true); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + try (TarArchiveOutputStream ar = new TarArchiveOutputStream( + fileSystem.create(hdfsWritePath).getWrappedStream())) { - TarArchiveOutputStream ar = new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream()); + RemoteIterator iterator = fileSystem + .listFiles( + new Path(inputPath), true); - RemoteIterator fileStatusListIterator = fileSystem - .listFiles( - new Path(inputPath), true); + while (iterator.hasNext()) { + writeCurrentFile(fileSystem, dir_name, iterator, ar, 0); + } - while (fileStatusListIterator.hasNext()) { - writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, 0); } - - ar.close(); } public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name, diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java index 0bc782ccb..d06544ae1 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java @@ -10,8 +10,6 @@ import java.util.Optional; import java.util.stream.StreamSupport; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.bson.Document; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -21,6 +19,7 @@ import com.mongodb.BasicDBObject; import com.mongodb.MongoClient; import com.mongodb.MongoClientURI; import com.mongodb.QueryBuilder; +import com.mongodb.client.FindIterable; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoDatabase; @@ -46,7 +45,7 @@ public class MdstoreClient implements Closeable { final String currentId = Optional .ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query)) - .map(r -> r.first()) + .map(FindIterable::first) .map(d -> d.getString("currentId")) .orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId)); @@ -84,7 +83,7 @@ public class MdstoreClient implements Closeable { if (!Iterables.contains(client.listDatabaseNames(), dbName)) { final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress()); log.warn(err); - throw new RuntimeException(err); + throw new IllegalArgumentException(err); } return client.getDatabase(dbName); } @@ -97,7 +96,7 @@ public class MdstoreClient implements Closeable { String.format("Missing collection '%s' in database '%s'", collName, db.getName())); log.warn(err); if (abortIfMissing) { - throw new RuntimeException(err); + throw new IllegalArgumentException(err); } else { return null; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java index 6e02ca614..91c6c1825 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java @@ -24,7 +24,6 @@ import com.google.common.hash.Hashing; */ public class PacePerson { - private static final String UTF8 = "UTF-8"; private List name = Lists.newArrayList(); private List surname = Lists.newArrayList(); private List fullname = Lists.newArrayList(); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java similarity index 84% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java index c822a6723..c5926848e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.aggregation.common; +package eu.dnetlib.dhp.common.aggregation; import java.io.Closeable; import java.io.IOException; @@ -11,8 +11,6 @@ import java.util.Objects; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.gson.Gson; - import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.utils.DHPUtils; @@ -20,12 +18,12 @@ public class AggregatorReport extends LinkedHashMap implements C private static final Logger log = LoggerFactory.getLogger(AggregatorReport.class); - private MessageSender messageSender; + private transient MessageSender messageSender; public AggregatorReport() { } - public AggregatorReport(MessageSender messageSender) throws IOException { + public AggregatorReport(MessageSender messageSender) { this.messageSender = messageSender; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java index 1f267733d..3f5c6ad4a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java @@ -5,6 +5,9 @@ import java.io.*; import java.io.IOException; import java.util.concurrent.TimeUnit; +import org.apache.http.HttpHeaders; +import org.apache.http.entity.ContentType; + import com.google.gson.Gson; import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; @@ -43,7 +46,7 @@ public class ZenodoAPIClient implements Serializable { this.deposition_id = deposition_id; } - public ZenodoAPIClient(String urlString, String access_token) throws IOException { + public ZenodoAPIClient(String urlString, String access_token) { this.urlString = urlString; this.access_token = access_token; @@ -63,8 +66,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .post(body) .build(); @@ -103,8 +106,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(bucket + "/" + file_name) - .addHeader("Content-Type", "application/zip") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len)) .build(); @@ -130,8 +133,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString + "/" + deposition_id) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .put(body) .build(); @@ -197,7 +200,7 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString + "/" + deposition_id + "/actions/newversion") - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .post(body) .build(); @@ -270,8 +273,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .get() .build(); @@ -293,8 +296,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(url) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .get() .build(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java index c03762693..c14af55b6 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java @@ -32,13 +32,13 @@ public class Creator { public static Creator newInstance(String name, String affiliation, String orcid) { Creator c = new Creator(); - if (!(name == null)) { + if (name != null) { c.name = name; } - if (!(affiliation == null)) { + if (affiliation != null) { c.affiliation = affiliation; } - if (!(orcid == null)) { + if (orcid != null) { c.orcid = orcid; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java index c7428de7d..509f444b9 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java @@ -3,17 +3,12 @@ package eu.dnetlib.dhp.common.api.zenodo; import java.io.Serializable; -import net.minidev.json.annotate.JsonIgnore; - public class File implements Serializable { private String checksum; private String filename; private long filesize; private String id; - @JsonIgnore - // private Links links; - public String getChecksum() { return checksum; } @@ -46,13 +41,4 @@ public class File implements Serializable { this.id = id; } -// @JsonIgnore -// public Links getLinks() { -// return links; -// } -// -// @JsonIgnore -// public void setLinks(Links links) { -// this.links = links; -// } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorException.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java similarity index 93% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorException.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java index 144d297e6..5d94c2f89 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorException.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.collection; +package eu.dnetlib.dhp.common.collection; public class CollectorException extends Exception { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java new file mode 100644 index 000000000..44e19142c --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java @@ -0,0 +1,56 @@ + +package eu.dnetlib.dhp.common.collection; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.util.List; + +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.opencsv.bean.CsvToBeanBuilder; + +public class GetCSV { + + public static final char DEFAULT_DELIMITER = ','; + + private GetCSV() { + } + + public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath, + String modelClass) throws IOException, ClassNotFoundException { + getCsv(fileSystem, reader, hdfsPath, modelClass, DEFAULT_DELIMITER); + } + + public static void getCsv(FileSystem fileSystem, Reader reader, String hdfsPath, + String modelClass, char delimiter) throws IOException, ClassNotFoundException { + + Path hdfsWritePath = new Path(hdfsPath); + FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { + fileSystem.delete(hdfsWritePath, false); + } + fsDataOutputStream = fileSystem.create(hdfsWritePath); + + try (BufferedWriter writer = new BufferedWriter( + new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8))) { + + final ObjectMapper mapper = new ObjectMapper(); + + @SuppressWarnings("unchecked") + final List lines = new CsvToBeanBuilder(reader) + .withType(Class.forName(modelClass)) + .withSeparator(delimiter) + .build() + .parse(); + + for (Object line : lines) { + writer.write(mapper.writeValueAsString(line)); + writer.newLine(); + } + } + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java similarity index 97% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java index ab0d5cc02..6fcec00dd 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.collection; +package eu.dnetlib.dhp.common.collection; /** * Bundles the http connection parameters driving the client behaviour. diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java similarity index 86% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java index a61e2032c..dd46ab1f4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.collection; +package eu.dnetlib.dhp.common.collection; import static eu.dnetlib.dhp.utils.DHPUtils.*; @@ -15,12 +15,13 @@ import org.apache.http.HttpHeaders; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; +import eu.dnetlib.dhp.common.Constants; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; /** * Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java * - * @author jochen, michele, andrea, alessia, claudio + * @author jochen, michele, andrea, alessia, claudio, andreas */ public class HttpConnector2 { @@ -32,7 +33,7 @@ public class HttpConnector2 { private String responseType = null; - private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; + private static final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; public HttpConnector2() { this(new HttpClientParams()); @@ -112,6 +113,17 @@ public class HttpConnector2 { } int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); + String rateLimit = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT); + String rateRemaining = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING); + + if ((rateLimit != null) && (rateRemaining != null) && (Integer.parseInt(rateRemaining) < 2)) { + if (retryAfter > 0) { + backoffAndSleep(retryAfter); + } else { + backoffAndSleep(1000); + } + } + if (is2xx(urlConn.getResponseCode())) { input = urlConn.getInputStream(); responseType = urlConn.getContentType(); @@ -120,7 +132,7 @@ public class HttpConnector2 { if (is3xx(urlConn.getResponseCode())) { // REDIRECTS final String newUrl = obtainNewLocation(urlConn.getHeaderFields()); - log.info(String.format("The requested url has been moved to %s", newUrl)); + log.info("The requested url has been moved to {}", newUrl); report .put( REPORT_PREFIX + urlConn.getResponseCode(), @@ -140,14 +152,14 @@ public class HttpConnector2 { if (retryAfter > 0) { log .warn( - requestUrl + " - waiting and repeating request after suggested retry-after " - + retryAfter + " sec."); + "{} - waiting and repeating request after suggested retry-after {} sec.", + requestUrl, retryAfter); backoffAndSleep(retryAfter * 1000); } else { log .warn( - requestUrl + " - waiting and repeating request after default delay of " - + getClientParams().getRetryDelay() + " sec."); + "{} - waiting and repeating request after default delay of {} sec.", + requestUrl, getClientParams().getRetryDelay()); backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000); } report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl); @@ -181,12 +193,12 @@ public class HttpConnector2 { } private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { - log.debug("StatusCode: " + urlConn.getResponseMessage()); + log.debug("StatusCode: {}", urlConn.getResponseMessage()); for (Map.Entry> e : urlConn.getHeaderFields().entrySet()) { if (e.getKey() != null) { for (String v : e.getValue()) { - log.debug(" key: " + e.getKey() + " - value: " + v); + log.debug(" key: {} - value: {}", e.getKey(), v); } } } @@ -204,7 +216,7 @@ public class HttpConnector2 { private int obtainRetryAfter(final Map> headerMap) { for (String key : headerMap.keySet()) { - if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (headerMap.get(key).size() > 0) + if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty()) && NumberUtils.isCreatable(headerMap.get(key).get(0))) { return Integer.parseInt(headerMap.get(key).get(0)) + 10; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java index 98dabf56a..af6926cc7 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java @@ -1,11 +1,11 @@ package eu.dnetlib.dhp.common.rest; +import java.io.IOException; import java.util.Arrays; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; -import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpUriRequest; @@ -23,17 +23,20 @@ public class DNetRestClient { private static final ObjectMapper mapper = new ObjectMapper(); + private DNetRestClient() { + } + public static T doGET(final String url, Class clazz) throws Exception { final HttpGet httpGet = new HttpGet(url); return doHTTPRequest(httpGet, clazz); } - public static String doGET(final String url) throws Exception { + public static String doGET(final String url) throws IOException { final HttpGet httpGet = new HttpGet(url); return doHTTPRequest(httpGet); } - public static String doPOST(final String url, V objParam) throws Exception { + public static String doPOST(final String url, V objParam) throws IOException { final HttpPost httpPost = new HttpPost(url); if (objParam != null) { @@ -45,25 +48,25 @@ public class DNetRestClient { return doHTTPRequest(httpPost); } - public static T doPOST(final String url, V objParam, Class clazz) throws Exception { + public static T doPOST(final String url, V objParam, Class clazz) throws IOException { return mapper.readValue(doPOST(url, objParam), clazz); } - private static String doHTTPRequest(final HttpUriRequest r) throws Exception { - CloseableHttpClient client = HttpClients.createDefault(); + private static String doHTTPRequest(final HttpUriRequest r) throws IOException { + try (CloseableHttpClient client = HttpClients.createDefault()) { - log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString()); - log - .info( - "request headers: {}", - Arrays - .asList(r.getAllHeaders()) - .stream() - .map(h -> h.getName() + ":" + h.getValue()) - .collect(Collectors.joining(","))); + log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString()); + log + .info( + "request headers: {}", + Arrays + .asList(r.getAllHeaders()) + .stream() + .map(h -> h.getName() + ":" + h.getValue()) + .collect(Collectors.joining(","))); - CloseableHttpResponse response = client.execute(r); - return IOUtils.toString(response.getEntity().getContent()); + return IOUtils.toString(client.execute(r).getEntity().getContent()); + } } private static T doHTTPRequest(final HttpUriRequest r, Class clazz) throws Exception { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java index a9daede8f..b3eb98d4f 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java @@ -46,7 +46,7 @@ public class Vocabulary implements Serializable { } public VocabularyTerm getTerm(final String id) { - return Optional.ofNullable(id).map(s -> s.toLowerCase()).map(s -> terms.get(s)).orElse(null); + return Optional.ofNullable(id).map(String::toLowerCase).map(terms::get).orElse(null); } protected void addTerm(final String id, final String name) { @@ -81,7 +81,6 @@ public class Vocabulary implements Serializable { .ofNullable(getTermBySynonym(syn)) .map(term -> getTermAsQualifier(term.getId())) .orElse(null); - // .orElse(OafMapperUtils.unknown(getId(), getName())); } } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java index a89bb486f..d5f57849c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java @@ -46,7 +46,6 @@ public class VocabularyGroup implements Serializable { } vocs.addTerm(vocId, termId, termName); - // vocs.addSynonyms(vocId, termId, termId); } } @@ -58,7 +57,6 @@ public class VocabularyGroup implements Serializable { final String syn = arr[2].trim(); vocs.addSynonyms(vocId, termId, syn); - // vocs.addSynonyms(vocId, termId, termId); } } @@ -98,7 +96,7 @@ public class VocabularyGroup implements Serializable { .getTerms() .values() .stream() - .map(t -> t.getId()) + .map(VocabularyTerm::getId) .collect(Collectors.toCollection(HashSet::new)); } @@ -154,16 +152,19 @@ public class VocabularyGroup implements Serializable { return Optional .ofNullable(vocId) .map(String::toLowerCase) - .map(id -> vocs.containsKey(id)) + .map(vocs::containsKey) .orElse(false); } private void addSynonyms(final String vocId, final String termId, final String syn) { String id = Optional .ofNullable(vocId) - .map(s -> s.toLowerCase()) + .map(String::toLowerCase) .orElseThrow( - () -> new IllegalArgumentException(String.format("empty vocabulary id for [term:%s, synonym:%s]"))); + () -> new IllegalArgumentException( + String + .format( + "empty vocabulary id for [term:%s, synonym:%s]", termId, syn))); Optional .ofNullable(vocs.get(id)) .orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId)) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java b/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java index f1107b4b8..c7a0b5f50 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.message; import java.io.Serializable; -import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; @@ -10,8 +9,8 @@ public class Message implements Serializable { private static final long serialVersionUID = 401753881204524893L; - public static String CURRENT_PARAM = "current"; - public static String TOTAL_PARAM = "total"; + public static final String CURRENT_PARAM = "current"; + public static final String TOTAL_PARAM = "total"; private MessageType messageType; diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java index 7a8e55a6e..aea046203 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.merge; import java.text.Normalizer; import java.util.*; import java.util.stream.Collectors; -import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; @@ -19,6 +18,9 @@ public class AuthorMerger { private static final Double THRESHOLD = 0.95; + private AuthorMerger() { + } + public static List merge(List> authors) { authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2))); @@ -36,7 +38,8 @@ public class AuthorMerger { public static List mergeAuthor(final List a, final List b, Double threshold) { int pa = countAuthorsPids(a); int pb = countAuthorsPids(b); - List base, enrich; + List base; + List enrich; int sa = authorsSize(a); int sb = authorsSize(b); @@ -62,7 +65,7 @@ public class AuthorMerger { // (if an Author has more than 1 pid, it appears 2 times in the list) final Map basePidAuthorMap = base .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .flatMap( a -> a .getPid() @@ -74,7 +77,7 @@ public class AuthorMerger { // (list of pid that are missing in the other list) final List> pidToEnrich = enrich .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .flatMap( a -> a .getPid() @@ -117,9 +120,9 @@ public class AuthorMerger { } public static String pidToComparableString(StructuredProperty pid) { - return (pid.getQualifier() != null - ? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : "" - : "") + final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() + : ""; + return (pid.getQualifier() != null ? classid : "") + (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java b/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java index 9ac0a0bf7..fd4c0191a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java @@ -12,6 +12,9 @@ import com.ximpleware.VTDNav; /** Created by sandro on 9/29/16. */ public class VtdUtilityParser { + private VtdUtilityParser() { + } + public static List getTextValuesWithAttributes( final AutoPilot ap, final VTDNav vn, final String xpath, final List attributes) throws VtdException { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 1d002ed7e..d8b1cded8 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -284,7 +284,7 @@ public class GraphCleaningFunctions extends CleaningFunctions { r .getAuthor() .stream() - .filter(a -> Objects.nonNull(a)) + .filter(Objects::nonNull) .filter(a -> StringUtils.isNotBlank(a.getFullname())) .filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", ""))) .collect(Collectors.toList())); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index c6a8fd5a7..720fe47fb 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -17,13 +17,16 @@ import eu.dnetlib.dhp.schema.oaf.*; public class OafMapperUtils { + private OafMapperUtils() { + } + public static Oaf merge(final Oaf left, final Oaf right) { if (ModelSupport.isSubClass(left, OafEntity.class)) { return mergeEntities((OafEntity) left, (OafEntity) right); } else if (ModelSupport.isSubClass(left, Relation.class)) { ((Relation) left).mergeFrom((Relation) right); } else { - throw new RuntimeException("invalid Oaf type:" + left.getClass().getCanonicalName()); + throw new IllegalArgumentException("invalid Oaf type:" + left.getClass().getCanonicalName()); } return left; } @@ -38,7 +41,7 @@ public class OafMapperUtils { } else if (ModelSupport.isSubClass(left, Project.class)) { left.mergeFrom(right); } else { - throw new RuntimeException("invalid OafEntity subtype:" + left.getClass().getCanonicalName()); + throw new IllegalArgumentException("invalid OafEntity subtype:" + left.getClass().getCanonicalName()); } return left; } @@ -62,7 +65,7 @@ public class OafMapperUtils { public static List listKeyValues(final String... s) { if (s.length % 2 > 0) { - throw new RuntimeException("Invalid number of parameters (k,v,k,v,....)"); + throw new IllegalArgumentException("Invalid number of parameters (k,v,k,v,....)"); } final List list = new ArrayList<>(); @@ -88,7 +91,7 @@ public class OafMapperUtils { .stream(values) .map(v -> field(v, info)) .filter(Objects::nonNull) - .filter(distinctByKey(f -> f.getValue())) + .filter(distinctByKey(Field::getValue)) .collect(Collectors.toList()); } @@ -97,7 +100,7 @@ public class OafMapperUtils { .stream() .map(v -> field(v, info)) .filter(Objects::nonNull) - .filter(distinctByKey(f -> f.getValue())) + .filter(distinctByKey(Field::getValue)) .collect(Collectors.toList()); } @@ -342,10 +345,10 @@ public class OafMapperUtils { if (instanceList != null) { final Optional min = instanceList .stream() - .map(i -> i.getAccessright()) + .map(Instance::getAccessright) .min(new AccessRightComparator<>()); - final Qualifier rights = min.isPresent() ? qualifier(min.get()) : new Qualifier(); + final Qualifier rights = min.map(OafMapperUtils::qualifier).orElseGet(Qualifier::new); if (StringUtils.isBlank(rights.getClassid())) { rights.setClassid(UNKNOWN); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java index 8d760a2cd..6a86f30df 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java @@ -34,6 +34,9 @@ public class DHPUtils { private static final Logger log = LoggerFactory.getLogger(DHPUtils.class); + private DHPUtils() { + } + public static Seq toSeq(List list) { return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq(); } @@ -44,7 +47,7 @@ public class DHPUtils { md.update(s.getBytes(StandardCharsets.UTF_8)); return new String(Hex.encodeHex(md.digest())); } catch (final Exception e) { - System.err.println("Error creating id"); + log.error("Error creating id from {}", s); return null; } } @@ -53,33 +56,6 @@ public class DHPUtils { return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId)); } - public static String compressString(final String input) { - try (ByteArrayOutputStream out = new ByteArrayOutputStream(); - Base64OutputStream b64os = new Base64OutputStream(out)) { - GZIPOutputStream gzip = new GZIPOutputStream(b64os); - gzip.write(input.getBytes(StandardCharsets.UTF_8)); - gzip.close(); - return out.toString(); - } catch (Throwable e) { - return null; - } - } - - public static String decompressString(final String input) { - byte[] byteArray = Base64.decodeBase64(input.getBytes()); - int len; - try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray))); - ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) { - byte[] buffer = new byte[1024]; - while ((len = gis.read(buffer)) != -1) { - bos.write(buffer, 0, len); - } - return bos.toString(); - } catch (Exception e) { - return null; - } - } - public static String getJPathString(final String jsonPath, final String json) { try { Object o = JsonPath.read(json, jsonPath); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java index b326c4159..8ae0bb5c3 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java @@ -18,13 +18,16 @@ public class ISLookupClientFactory { private static final int requestTimeout = 60000 * 10; private static final int connectTimeout = 60000 * 10; + private ISLookupClientFactory() { + } + public static ISLookUpService getLookUpService(final String isLookupUrl) { return getServiceStub(ISLookUpService.class, isLookupUrl); } @SuppressWarnings("unchecked") private static T getServiceStub(final Class clazz, final String endpoint) { - log.info(String.format("creating %s stub from %s", clazz.getName(), endpoint)); + log.info("creating {} stub from {}", clazz.getName(), endpoint); final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean(); jaxWsProxyFactory.setServiceClass(clazz); jaxWsProxyFactory.setAddress(endpoint); @@ -38,12 +41,10 @@ public class ISLookupClientFactory { log .info( - String - .format( - "setting connectTimeout to %s, requestTimeout to %s for service %s", - connectTimeout, - requestTimeout, - clazz.getCanonicalName())); + "setting connectTimeout to {}, requestTimeout to {} for service {}", + connectTimeout, + requestTimeout, + clazz.getCanonicalName()); policy.setConnectionTimeout(connectTimeout); policy.setReceiveTimeout(requestTimeout); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java index 9b00b908c..81f1b5142 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java @@ -10,7 +10,7 @@ import net.sf.saxon.trans.XPathException; public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition { - public static String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension"; + public static final String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension"; public abstract String getName(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java index c7e311b02..1ea2b9f46 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java @@ -26,7 +26,7 @@ public class ExtractYear extends AbstractExtensionFunction { @Override public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException { - if (arguments == null | arguments.length == 0) { + if (arguments == null || arguments.length == 0) { return new StringValue(""); } final Item item = arguments[0].head(); @@ -63,8 +63,7 @@ public class ExtractYear extends AbstractExtensionFunction { for (String format : dateFormats) { try { c.setTime(new SimpleDateFormat(format).parse(s)); - String year = String.valueOf(c.get(Calendar.YEAR)); - return year; + return String.valueOf(c.get(Calendar.YEAR)); } catch (ParseException e) { } } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java index 1b5f3c40d..3e5def9b5 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java @@ -30,7 +30,7 @@ public class NormalizeDate extends AbstractExtensionFunction { @Override public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException { - if (arguments == null | arguments.length == 0) { + if (arguments == null || arguments.length == 0) { return new StringValue(BLANK); } String s = arguments[0].head().getStringValue(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java index 46ecafd0a..b46a415d8 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.utils.saxon; +import static org.apache.commons.lang3.StringUtils.isNotBlank; + import org.apache.commons.lang3.StringUtils; import net.sf.saxon.expr.XPathContext; @@ -26,7 +28,8 @@ public class PickFirst extends AbstractExtensionFunction { final String s1 = getValue(arguments[0]); final String s2 = getValue(arguments[1]); - return new StringValue(StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : ""); + final String value = isNotBlank(s1) ? s1 : isNotBlank(s2) ? s2 : ""; + return new StringValue(value); } private String getValue(final Sequence arg) throws XPathException { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java index b85d866f1..61049d2e1 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java @@ -12,6 +12,9 @@ import net.sf.saxon.TransformerFactoryImpl; public class SaxonTransformerFactory { + private SaxonTransformerFactory() { + } + /** * Creates the index record transformer from the given XSLT * diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java index e14020830..1788239f2 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java @@ -7,10 +7,10 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import org.apache.commons.io.IOUtils; import org.junit.jupiter.api.Test; -public class ArgumentApplicationParserTest { +class ArgumentApplicationParserTest { @Test - public void testParseParameter() throws Exception { + void testParseParameter() throws Exception { final String jsonConfiguration = IOUtils .toString( this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json")); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java index 870943816..fa721d5e5 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java @@ -21,13 +21,13 @@ public class HdfsSupportTest { class Remove { @Test - public void shouldThrowARuntimeExceptionOnError() { + void shouldThrowARuntimeExceptionOnError() { // when assertThrows(RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration())); } @Test - public void shouldRemoveADirFromHDFS(@TempDir Path tempDir) { + void shouldRemoveADirFromHDFS(@TempDir Path tempDir) { // when HdfsSupport.remove(tempDir.toString(), new Configuration()); @@ -36,7 +36,7 @@ public class HdfsSupportTest { } @Test - public void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException { + void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException { // given Path file = Files.createTempFile(tempDir, "p", "s"); @@ -52,13 +52,13 @@ public class HdfsSupportTest { class ListFiles { @Test - public void shouldThrowARuntimeExceptionOnError() { + void shouldThrowARuntimeExceptionOnError() { // when assertThrows(RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration())); } @Test - public void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException { + void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException { Path subDir1 = Files.createTempDirectory(tempDir, "list_me"); Path subDir2 = Files.createTempDirectory(tempDir, "list_me"); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java index 5ebd7213e..cb9ae2886 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java @@ -5,10 +5,10 @@ import static org.junit.jupiter.api.Assertions.*; import org.junit.jupiter.api.Test; -public class PacePersonTest { +class PacePersonTest { @Test - public void pacePersonTest1() { + void pacePersonTest1() { PacePerson p = new PacePerson("Artini, Michele", false); assertEquals("Artini", p.getSurnameString()); @@ -17,7 +17,7 @@ public class PacePersonTest { } @Test - public void pacePersonTest2() { + void pacePersonTest2() { PacePerson p = new PacePerson("Michele G. Artini", false); assertEquals("Artini, Michele G.", p.getNormalisedFullname()); assertEquals("Michele G", p.getNameString()); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java index 2f01c0863..8fa966c2f 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java @@ -18,7 +18,8 @@ public class SparkSessionSupportTest { class RunWithSparkSession { @Test - public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() + @SuppressWarnings("unchecked") + void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() throws Exception { // given SparkSession spark = mock(SparkSession.class); @@ -37,7 +38,8 @@ public class SparkSessionSupportTest { } @Test - public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() + @SuppressWarnings("unchecked") + void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() throws Exception { // given SparkSession spark = mock(SparkSession.class); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java index 9ae9c33c2..2ccaed3e4 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java @@ -12,7 +12,7 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @Disabled -public class ZenodoAPIClientTest { +class ZenodoAPIClientTest { private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions"; private final String ACCESS_TOKEN = ""; @@ -22,7 +22,7 @@ public class ZenodoAPIClientTest { private final String depositionId = "674915"; @Test - public void testUploadOldDeposition() throws IOException, MissingConceptDoiException { + void testUploadOldDeposition() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId)); @@ -44,7 +44,7 @@ public class ZenodoAPIClientTest { } @Test - public void testNewDeposition() throws IOException { + void testNewDeposition() throws IOException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); @@ -67,7 +67,7 @@ public class ZenodoAPIClientTest { } @Test - public void testNewVersionNewName() throws IOException, MissingConceptDoiException { + void testNewVersionNewName() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); @@ -87,7 +87,7 @@ public class ZenodoAPIClientTest { } @Test - public void testNewVersionOldName() throws IOException, MissingConceptDoiException { + void testNewVersionOldName() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java index 9c4e62214..3a7a41a1b 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java @@ -21,7 +21,7 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.pace.util.MapDocumentUtil; import scala.Tuple2; -public class AuthorMergerTest { +class AuthorMergerTest { private String publicationsBasePath; @@ -43,7 +43,7 @@ public class AuthorMergerTest { } @Test - public void mergeTest() { // used in the dedup: threshold set to 0.95 + void mergeTest() { // used in the dedup: threshold set to 0.95 for (List authors1 : authors) { System.out.println("List " + (authors.indexOf(authors1) + 1)); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java index 8d519a93f..4068f0abb 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java @@ -21,7 +21,7 @@ import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Result; import me.xuender.unidecode.Unidecode; -public class OafMapperUtilsTest { +class OafMapperUtilsTest { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @@ -42,7 +42,7 @@ public class OafMapperUtilsTest { } @Test - public void testDateValidation() { + void testDateValidation() { assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent()); assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent()); @@ -147,44 +147,46 @@ public class OafMapperUtilsTest { } @Test - public void testDate() { - System.out.println(GraphCleaningFunctions.cleanDate("23-FEB-1998")); + void testDate() { + final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998"); + assertNotNull(date); + System.out.println(date); } @Test - public void testMergePubs() throws IOException { + void testMergePubs() throws IOException { Publication p1 = read("publication_1.json", Publication.class); Publication p2 = read("publication_2.json", Publication.class); Dataset d1 = read("dataset_1.json", Dataset.class); Dataset d2 = read("dataset_2.json", Dataset.class); - assertEquals(p1.getCollectedfrom().size(), 1); - assertEquals(p1.getCollectedfrom().get(0).getKey(), ModelConstants.CROSSREF_ID); - assertEquals(d2.getCollectedfrom().size(), 1); + assertEquals(1, p1.getCollectedfrom().size()); + assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey()); + assertEquals(1, d2.getCollectedfrom().size()); assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); - assertTrue( + assertEquals( + ModelConstants.PUBLICATION_RESULTTYPE_CLASSID, OafMapperUtils .mergeResults(p1, d2) .getResulttype() - .getClassid() - .equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID)); + .getClassid()); - assertEquals(p2.getCollectedfrom().size(), 1); + assertEquals(1, p2.getCollectedfrom().size()); assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); - assertEquals(d1.getCollectedfrom().size(), 1); + assertEquals(1, d1.getCollectedfrom().size()); assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); - assertTrue( + assertEquals( + ModelConstants.DATASET_RESULTTYPE_CLASSID, OafMapperUtils .mergeResults(p2, d1) .getResulttype() - .getClassid() - .equals(ModelConstants.DATASET_RESULTTYPE_CLASSID)); + .getClassid()); } protected HashSet cfId(List collectedfrom) { - return collectedfrom.stream().map(c -> c.getKey()).collect(Collectors.toCollection(HashSet::new)); + return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new)); } protected T read(String filename, Class clazz) throws IOException { diff --git a/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java b/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java index d1d1ada71..5743b0831 100644 --- a/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java @@ -3,10 +3,10 @@ package eu.dnetlib.scholexplorer.relation; import org.junit.jupiter.api.Test; -public class RelationMapperTest { +class RelationMapperTest { @Test - public void testLoadRels() throws Exception { + void testLoadRels() throws Exception { RelationMapper relationMapper = RelationMapper.load(); relationMapper.keySet().forEach(System.out::println); diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java index 5a80c0b53..088e618c7 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java @@ -3,40 +3,37 @@ package eu.dnetlib.dhp.actionmanager; import java.io.Serializable; import java.io.StringReader; -import java.util.*; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Triple; import org.dom4j.Document; import org.dom4j.DocumentException; -import org.dom4j.Element; import org.dom4j.io.SAXReader; -import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; import com.google.common.collect.Sets; import eu.dnetlib.actionmanager.rmi.ActionManagerException; -import eu.dnetlib.actionmanager.set.ActionManagerSet; -import eu.dnetlib.actionmanager.set.ActionManagerSet.ImpactTypes; -import eu.dnetlib.dhp.actionmanager.partition.PartitionActionSetsByPayloadTypeJob; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import scala.Tuple2; public class ISClient implements Serializable { - private static final Logger log = LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class); + private static final Logger log = LoggerFactory.getLogger(ISClient.class); private static final String INPUT_ACTION_SET_ID_SEPARATOR = ","; - private final ISLookUpService isLookup; + private final transient ISLookUpService isLookup; public ISClient(String isLookupUrl) { isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl); @@ -63,7 +60,7 @@ public class ISClient implements Serializable { .map( sets -> sets .stream() - .map(set -> parseSetInfo(set)) + .map(ISClient::parseSetInfo) .filter(t -> ids.contains(t.getLeft())) .map(t -> buildDirectory(basePath, t)) .collect(Collectors.toList())) @@ -73,15 +70,17 @@ public class ISClient implements Serializable { } } - private Triple parseSetInfo(String set) { + private static Triple parseSetInfo(String set) { try { - Document doc = new SAXReader().read(new StringReader(set)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + Document doc = reader.read(new StringReader(set)); return Triple .of( doc.valueOf("//SET/@id"), doc.valueOf("//SET/@directory"), doc.valueOf("//SET/@latest")); - } catch (DocumentException e) { + } catch (DocumentException | SAXException e) { throw new IllegalStateException(e); } } @@ -99,7 +98,7 @@ public class ISClient implements Serializable { final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='" + propertyName + "']/@value/string()"; - log.debug("quering for service property: " + q); + log.debug("quering for service property: {}", q); try { final List value = isLookup.quickSearchProfile(q); return Iterables.getOnlyElement(value); diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java index fbb072957..eccfa445c 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java @@ -62,6 +62,7 @@ public class MergeAndGet { x.getClass().getCanonicalName(), y.getClass().getCanonicalName())); } + @SuppressWarnings("unchecked") private static G selectNewerAndGet(G x, A y) { if (x.getClass().equals(y.getClass()) && x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) { diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java index 7893fcf8b..c5f252c97 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java @@ -74,7 +74,9 @@ public class PromoteActionPayloadForGraphTableJob { .orElse(true); logger.info("shouldGroupById: {}", shouldGroupById); + @SuppressWarnings("unchecked") Class rowClazz = (Class) Class.forName(graphTableClassName); + @SuppressWarnings("unchecked") Class actionPayloadClazz = (Class) Class.forName(actionPayloadClassName); throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz); @@ -152,7 +154,7 @@ public class PromoteActionPayloadForGraphTableJob { return spark .read() .parquet(path) - .map((MapFunction) value -> extractPayload(value), Encoders.STRING()) + .map((MapFunction) PromoteActionPayloadForGraphTableJob::extractPayload, Encoders.STRING()) .map( (MapFunction) value -> decodePayload(actionPayloadClazz, value), Encoders.bean(actionPayloadClazz)); diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java index f51c697f4..62eec13d5 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java @@ -80,7 +80,7 @@ public class PartitionActionSetsByPayloadTypeJobTest { private ISClient isClient; @Test - public void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception { + void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception { // given Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets"); Path outputDir = workingDir.resolve("output"); diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java index b2248d77a..4c88e9de3 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java @@ -20,7 +20,7 @@ public class MergeAndGetTest { class MergeFromAndGetStrategy { @Test - public void shouldThrowForOafAndOaf() { + void shouldThrowForOafAndOaf() { // given Oaf a = mock(Oaf.class); Oaf b = mock(Oaf.class); @@ -33,7 +33,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafAndRelation() { + void shouldThrowForOafAndRelation() { // given Oaf a = mock(Oaf.class); Relation b = mock(Relation.class); @@ -46,7 +46,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafAndOafEntity() { + void shouldThrowForOafAndOafEntity() { // given Oaf a = mock(Oaf.class); OafEntity b = mock(OafEntity.class); @@ -59,7 +59,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForRelationAndOaf() { + void shouldThrowForRelationAndOaf() { // given Relation a = mock(Relation.class); Oaf b = mock(Oaf.class); @@ -72,7 +72,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForRelationAndOafEntity() { + void shouldThrowForRelationAndOafEntity() { // given Relation a = mock(Relation.class); OafEntity b = mock(OafEntity.class); @@ -85,7 +85,7 @@ public class MergeAndGetTest { } @Test - public void shouldBehaveProperlyForRelationAndRelation() { + void shouldBehaveProperlyForRelationAndRelation() { // given Relation a = mock(Relation.class); Relation b = mock(Relation.class); @@ -101,7 +101,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndOaf() { + void shouldThrowForOafEntityAndOaf() { // given OafEntity a = mock(OafEntity.class); Oaf b = mock(Oaf.class); @@ -114,7 +114,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndRelation() { + void shouldThrowForOafEntityAndRelation() { // given OafEntity a = mock(OafEntity.class); Relation b = mock(Relation.class); @@ -127,7 +127,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndOafEntityButNotSubclasses() { + void shouldThrowForOafEntityAndOafEntityButNotSubclasses() { // given class OafEntitySub1 extends OafEntity { } @@ -145,7 +145,7 @@ public class MergeAndGetTest { } @Test - public void shouldBehaveProperlyForOafEntityAndOafEntity() { + void shouldBehaveProperlyForOafEntityAndOafEntity() { // given OafEntity a = mock(OafEntity.class); OafEntity b = mock(OafEntity.class); @@ -165,7 +165,7 @@ public class MergeAndGetTest { class SelectNewerAndGetStrategy { @Test - public void shouldThrowForOafEntityAndRelation() { + void shouldThrowForOafEntityAndRelation() { // given OafEntity a = mock(OafEntity.class); Relation b = mock(Relation.class); @@ -178,7 +178,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForRelationAndOafEntity() { + void shouldThrowForRelationAndOafEntity() { // given Relation a = mock(Relation.class); OafEntity b = mock(OafEntity.class); @@ -191,7 +191,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndResult() { + void shouldThrowForOafEntityAndResult() { // given OafEntity a = mock(OafEntity.class); Result b = mock(Result.class); @@ -204,7 +204,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() { + void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() { // given // real types must be used because subclass-superclass resolution does not work for // mocks @@ -221,7 +221,7 @@ public class MergeAndGetTest { } @Test - public void shouldShouldReturnLeftForOafEntityAndOafEntity() { + void shouldShouldReturnLeftForOafEntityAndOafEntity() { // given OafEntity a = mock(OafEntity.class); when(a.getLastupdatetimestamp()).thenReturn(1L); @@ -238,7 +238,7 @@ public class MergeAndGetTest { } @Test - public void shouldShouldReturnRightForOafEntityAndOafEntity() { + void shouldShouldReturnRightForOafEntityAndOafEntity() { // given OafEntity a = mock(OafEntity.class); when(a.getLastupdatetimestamp()).thenReturn(2L); diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java index 79ab55e07..99ce961aa 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java @@ -77,7 +77,7 @@ public class PromoteActionPayloadForGraphTableJobTest { class Main { @Test - public void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() { + void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() { // given Class rowClazz = Relation.class; Class actionPayloadClazz = OafEntity.class; @@ -116,7 +116,7 @@ public class PromoteActionPayloadForGraphTableJobTest { @ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}") @MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams") - public void shouldPromoteActionPayloadForGraphTable( + void shouldPromoteActionPayloadForGraphTable( MergeAndGet.Strategy strategy, Class rowClazz, Class actionPayloadClazz) diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java index 477e4b204..cbc1bfaba 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java @@ -44,7 +44,7 @@ public class PromoteActionPayloadFunctionsTest { class JoinTableWithActionPayloadAndMerge { @Test - public void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() { + void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() { // given class OafImpl extends Oaf { } @@ -58,7 +58,7 @@ public class PromoteActionPayloadFunctionsTest { } @Test - public void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() { + void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() { // given String id0 = "id0"; String id1 = "id1"; @@ -138,7 +138,7 @@ public class PromoteActionPayloadFunctionsTest { } @Test - public void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() { + void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() { // given String id0 = "id0"; String id1 = "id1"; @@ -218,7 +218,7 @@ public class PromoteActionPayloadFunctionsTest { class GroupTableByIdAndMerge { @Test - public void shouldRunProperly() { + void shouldRunProperly() { // given String id1 = "id1"; String id2 = "id2"; diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index 294287008..98e22d8a3 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -84,14 +84,6 @@ json - - - org.apache.commons - commons-csv - 1.8 - - - org.apache.poi diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java index 4b9fd33f4..a48b84a33 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.actionmanager.bipfinder; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -28,15 +29,16 @@ import eu.dnetlib.dhp.schema.oaf.Result; public class CollectAndSave implements Serializable { private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { + public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( - CollectAndSave.class - .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json")); + Objects + .requireNonNull( + CollectAndSave.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java index cea8c2891..f178451c1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java @@ -87,7 +87,7 @@ public class SparkAtomicActionScoreJob implements Serializable { private static void prepareResults(SparkSession spark, String inputPath, String outputPath, String bipScorePath, Class inputClazz) { - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD bipDeserializeJavaRDD = sc .textFile(bipScorePath) @@ -101,8 +101,6 @@ public class SparkAtomicActionScoreJob implements Serializable { return bs; }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)); - System.out.println(bipScores.count()); - Dataset results = readPath(spark, inputPath, inputClazz); results.createOrReplaceTempView("result"); @@ -124,7 +122,7 @@ public class SparkAtomicActionScoreJob implements Serializable { ret.setId(value._2().getId()); return ret; }, Encoders.bean(BipScore.class)) - .groupByKey((MapFunction) value -> value.getId(), Encoders.STRING()) + .groupByKey((MapFunction) BipScore::getId, Encoders.STRING()) .mapGroups((MapGroupsFunction) (k, it) -> { Result ret = new Result(); ret.setDataInfo(getDataInfo()); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala index 92a870e37..bae41b218 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala @@ -8,14 +8,15 @@ import org.apache.http.impl.client.{HttpClientBuilder, HttpClients} import java.io.IOException -abstract class AbstractRestClient extends Iterator[String]{ + +abstract class AbstractRestClient extends Iterator[String] { var buffer: List[String] = List() - var current_index:Int = 0 + var current_index: Int = 0 var scroll_value: Option[String] = None - var complete:Boolean = false + var complete: Boolean = false def extractInfo(input: String): Unit @@ -23,13 +24,13 @@ abstract class AbstractRestClient extends Iterator[String]{ protected def getBufferData(): Unit - def doHTTPGETRequest(url:String): String = { + def doHTTPGETRequest(url: String): String = { val httpGet = new HttpGet(url) doHTTPRequest(httpGet) } - def doHTTPPOSTRequest(url:String, json:String): String = { + def doHTTPPOSTRequest(url: String, json: String): String = { val httpPost = new HttpPost(url) if (json != null) { val entity = new StringEntity(json) @@ -46,7 +47,7 @@ abstract class AbstractRestClient extends Iterator[String]{ override def next(): String = { - val next_item:String = buffer(current_index) + val next_item: String = buffer(current_index) current_index = current_index + 1 if (current_index == buffer.size) getBufferData() @@ -54,17 +55,16 @@ abstract class AbstractRestClient extends Iterator[String]{ } - - - private def doHTTPRequest[A <: HttpUriRequest](r: A) :String ={ + private def doHTTPRequest[A <: HttpUriRequest](r: A): String = { val timeout = 60; // seconds val config = RequestConfig.custom() .setConnectTimeout(timeout * 1000) .setConnectionRequestTimeout(timeout * 1000) .setSocketTimeout(timeout * 1000).build() - val client =HttpClientBuilder.create().setDefaultRequestConfig(config).build() - var tries = 4 - while (tries > 0) { + val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() + try { + var tries = 4 + while (tries > 0) { println(s"requesting ${r.getURI}") try { val response = client.execute(r) @@ -78,10 +78,15 @@ abstract class AbstractRestClient extends Iterator[String]{ case e: Throwable => println(s"Error on requesting ${r.getURI}") e.printStackTrace() - tries-=1 + tries -= 1 } } "" - } + } finally { + if (client != null) + client.close() + } + } + getBufferData() } \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/SparkDownloadUpdateDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/SparkDownloadUpdateDatacite.scala new file mode 100644 index 000000000..0b459fcf6 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/SparkDownloadUpdateDatacite.scala @@ -0,0 +1,53 @@ +package eu.dnetlib.dhp.actionmanager.datacite + + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.oaf.{Oaf, Result} +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.LocalFileSystem +import org.apache.hadoop.hdfs.DistributedFileSystem +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Encoder, Encoders, SparkSession} +import org.apache.spark.sql.functions.max +import org.slf4j.{Logger, LoggerFactory} + +import java.text.SimpleDateFormat +import java.util.{Date, Locale} +import scala.io.Source + +object SparkDownloadUpdateDatacite { + val log: Logger = LoggerFactory.getLogger(getClass) + + def main(args: Array[String]): Unit = { + + val conf = new SparkConf + val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json")).mkString) + parser.parseArgument(args) + val master = parser.get("master") + val sourcePath = parser.get("sourcePath") + val workingPath = parser.get("workingPath") + + val hdfsuri = parser.get("namenode") + log.info(s"namenode is $hdfsuri") + + + val spark: SparkSession = SparkSession.builder().config(conf) + .appName(getClass.getSimpleName) + .master(master) + .getOrCreate() + + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val resEncoder: Encoder[Result] = Encoders.kryo[Result] + + import spark.implicits._ + + + val maxDate:String = spark.read.load(workingPath).as[Oaf].filter(s => s.isInstanceOf[Result]).map(r => r.asInstanceOf[Result].getDateofcollection).select(max("value")).first().getString(0) + val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US) + val string_to_date =ISO8601FORMAT.parse(maxDate) + val ts = string_to_date.getTime + + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java index 9e852eb77..686b0fc7f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java @@ -20,7 +20,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import scala.Tuple2; @@ -171,26 +171,23 @@ public class PrepareProgramme { } private static CSVProgramme groupProgrammeByCode(CSVProgramme a, CSVProgramme b) { - if (!a.getLanguage().equals("en")) { - if (b.getLanguage().equalsIgnoreCase("en")) { - a.setTitle(b.getTitle()); - a.setLanguage(b.getLanguage()); - } + if (!a.getLanguage().equals("en") && b.getLanguage().equalsIgnoreCase("en")) { + a.setTitle(b.getTitle()); + a.setLanguage(b.getLanguage()); } - if (StringUtils.isEmpty(a.getShortTitle())) { - if (!StringUtils.isEmpty(b.getShortTitle())) { - a.setShortTitle(b.getShortTitle()); - } + if (StringUtils.isEmpty(a.getShortTitle()) && !StringUtils.isEmpty(b.getShortTitle())) { + a.setShortTitle(b.getShortTitle()); } return a; } + @SuppressWarnings("unchecked") private static List prepareClassification(JavaRDD h2020Programmes) { Object[] codedescription = h2020Programmes .map( value -> new Tuple2<>(value.getCode(), - new Tuple2(value.getTitle(), value.getShortTitle()))) + new Tuple2<>(value.getTitle(), value.getShortTitle()))) .collect() .toArray(); @@ -216,7 +213,7 @@ public class PrepareProgramme { String[] tmp = ent.split("\\."); if (tmp.length <= 2) { if (StringUtils.isEmpty(entry._2()._2())) { - map.put(entry._1(), new Tuple2(entry._2()._1(), entry._2()._1())); + map.put(entry._1(), new Tuple2<>(entry._2()._1(), entry._2()._1())); } else { map.put(entry._1(), entry._2()); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java index cecd537ba..8efc76a0e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java @@ -18,7 +18,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import scala.Tuple2; @@ -29,7 +29,7 @@ import scala.Tuple2; */ public class PrepareProjects { - private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class); + private static final Logger log = LoggerFactory.getLogger(PrepareProjects.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static void main(String[] args) throws Exception { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java index 2bba9fb60..2cc20cb15 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java @@ -31,15 +31,16 @@ import eu.dnetlib.dhp.common.DbClient; */ public class ReadProjectsFromDB implements Closeable { - private final DbClient dbClient; private static final Log log = LogFactory.getLog(ReadProjectsFromDB.class); + + private static final String query = "SELECT code " + + "from projects where id like 'corda__h2020%' "; + + private final DbClient dbClient; private final Configuration conf; private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private final static String query = "SELECT code " + - "from projects where id like 'corda__h2020%' "; - public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -65,9 +66,9 @@ public class ReadProjectsFromDB implements Closeable { } } - public void execute(final String sql, final Function> producer) throws Exception { + public void execute(final String sql, final Function> producer) { - final Consumer consumer = rs -> producer.apply(rs).forEach(r -> writeProject(r)); + final Consumer consumer = rs -> producer.apply(rs).forEach(this::writeProject); dbClient.processResults(sql, consumer); } @@ -94,20 +95,20 @@ public class ReadProjectsFromDB implements Closeable { public ReadProjectsFromDB( final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword) - throws Exception { + throws IOException { this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); this.conf = new Configuration(); this.conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(this.conf); Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, false); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + FSDataOutputStream fos = fileSystem.create(hdfsWritePath); - this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); } @Override diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java index fdc12c662..cc1411b31 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.actionmanager.project; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.Arrays; -import java.util.HashMap; import java.util.Objects; import java.util.Optional; @@ -22,15 +21,16 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject; -import eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; +import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.H2020Classification; import eu.dnetlib.dhp.schema.oaf.H2020Programme; +import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; @@ -47,13 +47,10 @@ import scala.Tuple2; * * To produce one single entry for each project code a step of groupoing is needed: each project can be associated to more * than one programme. - * - * */ public class SparkAtomicActionJob { private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final HashMap programmeMap = new HashMap<>(); public static void main(String[] args) throws Exception { @@ -137,7 +134,6 @@ public class SparkAtomicActionJob { h2020classification.setClassification(csvProgramme.getClassification()); h2020classification.setH2020Programme(pm); setLevelsandProgramme(h2020classification, csvProgramme.getClassification_short()); - // setProgramme(h2020classification, ocsvProgramme.get().getClassification()); pp.setH2020classification(Arrays.asList(h2020classification)); return pp; @@ -152,20 +148,16 @@ public class SparkAtomicActionJob { .map((MapFunction, Project>) p -> { Optional op = Optional.ofNullable(p._2()); Project rp = p._1(); - if (op.isPresent()) { - rp.setH2020topicdescription(op.get().getTitle()); - } + op.ifPresent(excelTopic -> rp.setH2020topicdescription(excelTopic.getTitle())); return rp; }, Encoders.bean(Project.class)) .filter(Objects::nonNull) .groupByKey( - (MapFunction) p -> p.getId(), + (MapFunction) OafEntity::getId, Encoders.STRING()) .mapGroups((MapGroupsFunction) (s, it) -> { Project first = it.next(); - it.forEachRemaining(p -> { - first.mergeFrom(p); - }); + it.forEachRemaining(first::mergeFrom); return first; }, Encoders.bean(Project.class)) .toJavaRDD() @@ -189,12 +181,6 @@ public class SparkAtomicActionJob { h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]); } -// private static void setProgramme(H2020Classification h2020Classification, String classification) { -// String[] tmp = classification.split(" \\| "); -// -// h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]); -// } - public static Dataset readPath( SparkSession spark, String inputPath, Class clazz) { return spark diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java deleted file mode 100644 index 8bdce903b..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java +++ /dev/null @@ -1,40 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.project.utils; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; - -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVRecord; -import org.apache.commons.lang.reflect.FieldUtils; - -/** - * Reads a generic csv and maps it into classes that mirror its schema - */ -public class CSVParser { - - public List parse(String csvFile, String classForName) - throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException { - final CSVFormat format = CSVFormat.EXCEL - .withHeader() - .withDelimiter(';') - .withQuote('"') - .withTrim(); - List ret = new ArrayList<>(); - final org.apache.commons.csv.CSVParser parser = org.apache.commons.csv.CSVParser.parse(csvFile, format); - final Set headers = parser.getHeaderMap().keySet(); - Class clazz = Class.forName(classForName); - for (CSVRecord csvRecord : parser.getRecords()) { - final Object cc = clazz.newInstance(); - for (String header : headers) { - FieldUtils.writeField(cc, header, csvRecord.get(header), true); - - } - ret.add((R) cc); - } - - return ret; - } -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProject.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProject.java deleted file mode 100644 index 268d5f28c..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProject.java +++ /dev/null @@ -1,200 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.project.utils; - -import java.io.Serializable; - -/** - * the mmodel for the projects csv file - */ -public class CSVProject implements Serializable { - private String rcn; - private String id; - private String acronym; - private String status; - private String programme; - private String topics; - private String frameworkProgramme; - private String title; - private String startDate; - private String endDate; - private String projectUrl; - private String objective; - private String totalCost; - private String ecMaxContribution; - private String call; - private String fundingScheme; - private String coordinator; - private String coordinatorCountry; - private String participants; - private String participantCountries; - private String subjects; - - public String getRcn() { - return rcn; - } - - public void setRcn(String rcn) { - this.rcn = rcn; - } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getAcronym() { - return acronym; - } - - public void setAcronym(String acronym) { - this.acronym = acronym; - } - - public String getStatus() { - return status; - } - - public void setStatus(String status) { - this.status = status; - } - - public String getProgramme() { - return programme; - } - - public void setProgramme(String programme) { - this.programme = programme; - } - - public String getTopics() { - return topics; - } - - public void setTopics(String topics) { - this.topics = topics; - } - - public String getFrameworkProgramme() { - return frameworkProgramme; - } - - public void setFrameworkProgramme(String frameworkProgramme) { - this.frameworkProgramme = frameworkProgramme; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getStartDate() { - return startDate; - } - - public void setStartDate(String startDate) { - this.startDate = startDate; - } - - public String getEndDate() { - return endDate; - } - - public void setEndDate(String endDate) { - this.endDate = endDate; - } - - public String getProjectUrl() { - return projectUrl; - } - - public void setProjectUrl(String projectUrl) { - this.projectUrl = projectUrl; - } - - public String getObjective() { - return objective; - } - - public void setObjective(String objective) { - this.objective = objective; - } - - public String getTotalCost() { - return totalCost; - } - - public void setTotalCost(String totalCost) { - this.totalCost = totalCost; - } - - public String getEcMaxContribution() { - return ecMaxContribution; - } - - public void setEcMaxContribution(String ecMaxContribution) { - this.ecMaxContribution = ecMaxContribution; - } - - public String getCall() { - return call; - } - - public void setCall(String call) { - this.call = call; - } - - public String getFundingScheme() { - return fundingScheme; - } - - public void setFundingScheme(String fundingScheme) { - this.fundingScheme = fundingScheme; - } - - public String getCoordinator() { - return coordinator; - } - - public void setCoordinator(String coordinator) { - this.coordinator = coordinator; - } - - public String getCoordinatorCountry() { - return coordinatorCountry; - } - - public void setCoordinatorCountry(String coordinatorCountry) { - this.coordinatorCountry = coordinatorCountry; - } - - public String getParticipants() { - return participants; - } - - public void setParticipants(String participants) { - this.participants = participants; - } - - public String getParticipantCountries() { - return participantCountries; - } - - public void setParticipantCountries(String participantCountries) { - this.participantCountries = participantCountries; - } - - public String getSubjects() { - return subjects; - } - - public void setSubjects(String subjects) { - this.subjects = subjects; - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java index 5f5b61d8b..a520176f4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java @@ -17,6 +17,8 @@ import org.apache.poi.ss.usermodel.Row; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic; + /** * Reads a generic excel file and maps it into classes that mirror its schema */ @@ -26,52 +28,52 @@ public class EXCELParser { throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException, InvalidFormatException { - OPCPackage pkg = OPCPackage.open(file); - XSSFWorkbook wb = new XSSFWorkbook(pkg); + try (OPCPackage pkg = OPCPackage.open(file); XSSFWorkbook wb = new XSSFWorkbook(pkg)) { - XSSFSheet sheet = wb.getSheet(sheetName); - - if (sheetName == null) { - throw new RuntimeException("Sheet name " + sheetName + " not present in current file"); - } - - List ret = new ArrayList<>(); - - DataFormatter dataFormatter = new DataFormatter(); - Iterator rowIterator = sheet.rowIterator(); - List headers = new ArrayList<>(); - int count = 0; - while (rowIterator.hasNext()) { - Row row = rowIterator.next(); - - if (count == 0) { - Iterator cellIterator = row.cellIterator(); - - while (cellIterator.hasNext()) { - Cell cell = cellIterator.next(); - headers.add(dataFormatter.formatCellValue(cell)); - } - } else { - Class clazz = Class.forName(classForName); - final Object cc = clazz.newInstance(); - - for (int i = 0; i < headers.size(); i++) { - Cell cell = row.getCell(i); - FieldUtils.writeField(cc, headers.get(i), dataFormatter.formatCellValue(cell), true); - - } - - EXCELTopic et = (EXCELTopic) cc; - if (StringUtils.isNotBlank(et.getRcn())) { - ret.add((R) cc); - } + XSSFSheet sheet = wb.getSheet(sheetName); + if (sheet == null) { + throw new IllegalArgumentException("Sheet name " + sheetName + " not present in current file"); } - count += 1; - } + List ret = new ArrayList<>(); - return ret; + DataFormatter dataFormatter = new DataFormatter(); + Iterator rowIterator = sheet.rowIterator(); + List headers = new ArrayList<>(); + int count = 0; + while (rowIterator.hasNext()) { + Row row = rowIterator.next(); + + if (count == 0) { + Iterator cellIterator = row.cellIterator(); + + while (cellIterator.hasNext()) { + Cell cell = cellIterator.next(); + headers.add(dataFormatter.formatCellValue(cell)); + } + } else { + Class clazz = Class.forName(classForName); + final Object cc = clazz.newInstance(); + + for (int i = 0; i < headers.size(); i++) { + Cell cell = row.getCell(i); + FieldUtils.writeField(cc, headers.get(i), dataFormatter.formatCellValue(cell), true); + + } + + EXCELTopic et = (EXCELTopic) cc; + if (StringUtils.isNotBlank(et.getRcn())) { + ret.add((R) cc); + } + + } + + count += 1; + } + + return ret; + } } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java index c73f7ec3d..c967d4cae 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java @@ -1,34 +1,21 @@ package eu.dnetlib.dhp.actionmanager.project.utils; -import java.io.BufferedWriter; -import java.io.Closeable; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.nio.charset.StandardCharsets; +import java.io.*; +import java.util.Optional; import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - -import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.collection.HttpConnector2; +import eu.dnetlib.dhp.common.collection.GetCSV; +import eu.dnetlib.dhp.common.collection.HttpConnector2; /** * Applies the parsing of a csv file and writes the Serialization of it in hdfs */ -public class ReadCSV implements Closeable { - private static final Log log = LogFactory.getLog(ReadCSV.class); - private final Configuration conf; - private final BufferedWriter writer; - private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private final String csvFile; +public class ReadCSV { public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -44,56 +31,22 @@ public class ReadCSV implements Closeable { final String hdfsPath = parser.get("hdfsPath"); final String hdfsNameNode = parser.get("hdfsNameNode"); final String classForName = parser.get("classForName"); + Optional delimiter = Optional.ofNullable(parser.get("delimiter")); + char del = ';'; + if (delimiter.isPresent()) + del = delimiter.get().charAt(0); - try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL)) { + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); - log.info("Getting CSV file..."); - readCSV.execute(classForName); + FileSystem fileSystem = FileSystem.get(conf); + BufferedReader reader = new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))); - } - } + GetCSV.getCsv(fileSystem, reader, hdfsPath, classForName, del); - public void execute(final String classForName) throws Exception { - CSVParser csvParser = new CSVParser(); - csvParser - .parse(csvFile, classForName) - .stream() - .forEach(p -> write(p)); + reader.close(); } - @Override - public void close() throws IOException { - writer.close(); - } - - public ReadCSV( - final String hdfsPath, - final String hdfsNameNode, - final String fileURL) - throws Exception { - this.conf = new Configuration(); - this.conf.set("fs.defaultFS", hdfsNameNode); - HttpConnector2 httpConnector = new HttpConnector2(); - FileSystem fileSystem = FileSystem.get(this.conf); - Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; - if (fileSystem.exists(hdfsWritePath)) { - fileSystem.delete(hdfsWritePath, false); - } - fsDataOutputStream = fileSystem.create(hdfsWritePath); - - this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); - this.csvFile = httpConnector.getInputSource(fileURL); - } - - protected void write(final Object p) { - try { - writer.write(OBJECT_MAPPER.writeValueAsString(p)); - writer.newLine(); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java index 5ce0a681c..9e73cbc37 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java @@ -11,18 +11,20 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.collection.HttpConnector2; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpConnector2; /** * Applies the parsing of an excel file and writes the Serialization of it in hdfs */ public class ReadExcel implements Closeable { - private static final Log log = LogFactory.getLog(ReadCSV.class); - private final Configuration conf; + private static final Log log = LogFactory.getLog(ReadExcel.class); + private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final InputStream excelFile; @@ -31,7 +33,7 @@ public class ReadExcel implements Closeable { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - ReadCSV.class + ReadExcel.class .getResourceAsStream( "/eu/dnetlib/dhp/actionmanager/project/parameters.json"))); @@ -51,13 +53,15 @@ public class ReadExcel implements Closeable { } } - public void execute(final String classForName, final String sheetName) throws Exception { + public void execute(final String classForName, final String sheetName) + throws IOException, ClassNotFoundException, InvalidFormatException, IllegalAccessException, + InstantiationException { + EXCELParser excelParser = new EXCELParser(); excelParser .parse(excelFile, classForName, sheetName) .stream() - .forEach(p -> write(p)); - + .forEach(this::write); } @Override @@ -68,20 +72,20 @@ public class ReadExcel implements Closeable { public ReadExcel( final String hdfsPath, final String hdfsNameNode, - final String fileURL) - throws Exception { - this.conf = new Configuration(); - this.conf.set("fs.defaultFS", hdfsNameNode); + final String fileURL) throws CollectorException, IOException { + + final Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); HttpConnector2 httpConnector = new HttpConnector2(); - FileSystem fileSystem = FileSystem.get(this.conf); + FileSystem fileSystem = FileSystem.get(conf); Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, false); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + FSDataOutputStream fos = fileSystem.create(hdfsWritePath); - this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); this.excelFile = httpConnector.getInputSourceAsStream(fileURL); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java similarity index 79% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProgramme.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java index d486f0104..df06fd6b4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProgramme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java @@ -1,20 +1,32 @@ -package eu.dnetlib.dhp.actionmanager.project.utils; +package eu.dnetlib.dhp.actionmanager.project.utils.model; import java.io.Serializable; +import com.opencsv.bean.CsvBindByName; +import com.opencsv.bean.CsvIgnore; + /** * The model for the programme csv file */ public class CSVProgramme implements Serializable { - private String rcn; + @CsvBindByName(column = "code") private String code; + @CsvBindByName(column = "title") private String title; + + @CsvBindByName(column = "shortTitle") private String shortTitle; + + @CsvBindByName(column = "language") private String language; + + @CsvIgnore private String classification; + + @CsvIgnore private String classification_short; public String getClassification_short() { @@ -33,14 +45,6 @@ public class CSVProgramme implements Serializable { this.classification = classification; } - public String getRcn() { - return rcn; - } - - public void setRcn(String rcn) { - this.rcn = rcn; - } - public String getCode() { return code; } @@ -73,5 +77,4 @@ public class CSVProgramme implements Serializable { this.language = language; } -// } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java new file mode 100644 index 000000000..3ddb19636 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java @@ -0,0 +1,46 @@ + +package eu.dnetlib.dhp.actionmanager.project.utils.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +/** + * the mmodel for the projects csv file + */ +public class CSVProject implements Serializable { + + @CsvBindByName(column = "id") + private String id; + + @CsvBindByName(column = "programme") + private String programme; + + @CsvBindByName(column = "topics") + private String topics; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getProgramme() { + return programme; + } + + public void setProgramme(String programme) { + this.programme = programme; + } + + public String getTopics() { + return topics; + } + + public void setTopics(String topics) { + this.topics = topics; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELTopic.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/EXCELTopic.java similarity index 97% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELTopic.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/EXCELTopic.java index 5607df118..fa2e3422e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELTopic.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/EXCELTopic.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.project.utils; +package eu.dnetlib.dhp.actionmanager.project.utils.model; import java.io.Serializable; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java index d6c17e415..869e1cb68 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java @@ -9,6 +9,7 @@ import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; +import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; @@ -74,7 +75,7 @@ public class GenerateRorActionSetJob { final String jsonConfiguration = IOUtils .toString( - SparkAtomicActionJob.class + GenerateRorActionSetJob.class .getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -108,7 +109,7 @@ public class GenerateRorActionSetJob { private static void processRorOrganizations(final SparkSession spark, final String inputPath, - final String outputPath) throws Exception { + final String outputPath) throws IOException { readInputPath(spark, inputPath) .map( @@ -203,7 +204,7 @@ public class GenerateRorActionSetJob { private static Dataset readInputPath( final SparkSession spark, - final String path) throws Exception { + final String path) throws IOException { try (final FileSystem fileSystem = FileSystem.get(new Configuration()); final InputStream is = fileSystem.open(new Path(path))) { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java index b566a5501..a5acea5ae 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java @@ -7,6 +7,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class Address implements Serializable { + private static final long serialVersionUID = 2444635485253443195L; + @JsonProperty("lat") private Float lat; @@ -37,8 +39,6 @@ public class Address implements Serializable { @JsonProperty("line") private String line; - private final static long serialVersionUID = 2444635485253443195L; - public Float getLat() { return lat; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java index 3dab60a9f..1e7621f98 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java @@ -7,14 +7,14 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class Country implements Serializable { + private static final long serialVersionUID = 4357848706229493627L; + @JsonProperty("country_code") private String countryCode; @JsonProperty("country_name") private String countryName; - private final static long serialVersionUID = 4357848706229493627L; - public String getCountryCode() { return countryCode; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java index 406bfd82c..5ea419b4e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java @@ -13,7 +13,7 @@ public class ExternalIdType implements Serializable { private String preferred; - private final static long serialVersionUID = 2616688352998387611L; + private static final long serialVersionUID = 2616688352998387611L; public ExternalIdType() { } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java index 3fd0c9250..a744a325f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java @@ -15,8 +15,7 @@ import com.fasterxml.jackson.databind.JsonNode; public class ExternalIdTypeDeserializer extends JsonDeserializer { @Override - public ExternalIdType deserialize(final JsonParser p, final DeserializationContext ctxt) - throws IOException, JsonProcessingException { + public ExternalIdType deserialize(final JsonParser p, final DeserializationContext ctxt) throws IOException { final ObjectCodec oc = p.getCodec(); final JsonNode node = oc.readTree(p); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java index 9616db447..9317a777c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java @@ -19,7 +19,7 @@ public class GeonamesAdmin implements Serializable { @JsonProperty("code") private String code; - private final static long serialVersionUID = 7294958526269195673L; + private static final long serialVersionUID = 7294958526269195673L; public String getAsciiName() { return asciiName; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java index 2b0487168..b13d64b10 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java @@ -31,7 +31,7 @@ public class GeonamesCity implements Serializable { @JsonProperty("license") private License license; - private final static long serialVersionUID = -8389480201526252955L; + private static final long serialVersionUID = -8389480201526252955L; public NameAndCode getNutsLevel2() { return nutsLevel2; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java index 61eb0339d..9a2cb39e3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java @@ -13,7 +13,7 @@ public class Label implements Serializable { @JsonProperty("label") private String label; - private final static long serialVersionUID = -6576156103297850809L; + private static final long serialVersionUID = -6576156103297850809L; public String getIso639() { return iso639; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java index bdc8f4c42..a0f6cf774 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java @@ -13,7 +13,7 @@ public class License implements Serializable { @JsonProperty("license") private String license; - private final static long serialVersionUID = -194308261058176439L; + private static final long serialVersionUID = -194308261058176439L; public String getAttribution() { return attribution; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java index 61d7eb8e6..c0f5d7645 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java @@ -7,14 +7,14 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class NameAndCode implements Serializable { + private static final long serialVersionUID = 5459836979206140843L; + @JsonProperty("name") private String name; @JsonProperty("code") private String code; - private final static long serialVersionUID = 5459836979206140843L; - public String getName() { return name; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java index 8b73db98f..db9f96445 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java @@ -7,6 +7,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class Relationship implements Serializable { + private static final long serialVersionUID = 7847399503395576960L; + @JsonProperty("type") private String type; @@ -16,8 +18,6 @@ public class Relationship implements Serializable { @JsonProperty("label") private String label; - private final static long serialVersionUID = 7847399503395576960L; - public String getType() { return type; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java index 94de34fee..b8041cfdf 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java @@ -11,6 +11,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class RorOrganization implements Serializable { + private static final long serialVersionUID = -2658312087616043225L; + @JsonProperty("ip_addresses") private List ipAddresses = new ArrayList<>(); @@ -59,8 +61,6 @@ public class RorOrganization implements Serializable { @JsonProperty("status") private String status; - private final static long serialVersionUID = -2658312087616043225L; - public List getIpAddresses() { return ipAddresses; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java index 9926f1688..dbf053a72 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java @@ -6,6 +6,8 @@ import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; + public abstract class ReportingJob { /** @@ -22,7 +24,7 @@ public abstract class ReportingJob { protected final AggregatorReport report; - public ReportingJob(AggregatorReport report) { + protected ReportingJob(AggregatorReport report) { this.report = report; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java index 65e7805d8..bab44a3b1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java @@ -25,7 +25,7 @@ public class MDStoreActionNode { NEW_VERSION, ROLLBACK, COMMIT, READ_LOCK, READ_UNLOCK } - public static String NEW_VERSION_URI = "%s/mdstore/%s/newVersion"; + public static final String NEW_VERSION_URI = "%s/mdstore/%s/newVersion"; public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s"; public static final String ROLLBACK_VERSION_URL = "%s/version/%s/abort"; @@ -70,7 +70,7 @@ public class MDStoreActionNode { if (StringUtils.isBlank(hdfsuri)) { throw new IllegalArgumentException("missing or empty argument namenode"); } - final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); + final String mdStoreVersion_params = argumentParser.get(MDSTOREVERSIONPARAM); final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class); if (StringUtils.isBlank(mdStoreVersion.getId())) { @@ -94,7 +94,7 @@ public class MDStoreActionNode { break; } case ROLLBACK: { - final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); + final String mdStoreVersion_params = argumentParser.get(MDSTOREVERSIONPARAM); final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class); if (StringUtils.isBlank(mdStoreVersion.getId())) { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java index 23ee3e2c6..2ea3f35cc 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java @@ -16,7 +16,6 @@ import org.apache.hadoop.io.compress.DeflateCodec; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.aggregation.common.ReporterCallback; import eu.dnetlib.dhp.aggregation.common.ReportingJob; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; @@ -24,6 +23,9 @@ import eu.dnetlib.dhp.collection.plugin.mongodb.MDStoreCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.rest.RestCollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; public class CollectorWorker extends ReportingJob { @@ -116,7 +118,7 @@ public class CollectorWorker extends ReportingJob { final CollectorPlugin.NAME.OTHER_NAME plugin = Optional .ofNullable(api.getParams().get("other_plugin_type")) .map(CollectorPlugin.NAME.OTHER_NAME::valueOf) - .get(); + .orElseThrow(() -> new IllegalArgumentException("invalid other_plugin_type")); switch (plugin) { case mdstore_mongodb_dump: diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java index 545cbab0c..708d2da65 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java @@ -13,8 +13,10 @@ import org.apache.hadoop.fs.FileSystem; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java index f6fdc266e..f1f74b09e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java @@ -207,6 +207,7 @@ public class GenerateNativeStoreSparkJob { totalItems.add(1); try { SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); Document document = reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8))); Node node = document.selectSingleNode(xpath); final String originalIdentifier = node.getText(); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java index 457f63468..841d42fea 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java @@ -3,9 +3,9 @@ package eu.dnetlib.dhp.collection.plugin; import java.util.stream.Stream; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; public interface CollectorPlugin { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java index a27314983..ad28a7261 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java @@ -11,15 +11,13 @@ import org.bson.Document; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.mongodb.MongoClient; -import com.mongodb.MongoClientURI; import com.mongodb.client.MongoCollection; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.common.MdstoreClient; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; public class MDStoreCollectorPlugin implements CollectorPlugin { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java index 3199af5b7..036404141 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java @@ -12,10 +12,10 @@ import java.util.zip.GZIPInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; import eu.dnetlib.dhp.utils.DHPUtils; public class MongoDbDumpCollectorPlugin implements CollectorPlugin { @@ -23,7 +23,7 @@ public class MongoDbDumpCollectorPlugin implements CollectorPlugin { public static final String PATH_PARAM = "path"; public static final String BODY_JSONPATH = "$.body"; - public FileSystem fileSystem; + private final FileSystem fileSystem; public MongoDbDumpCollectorPlugin(FileSystem fileSystem) { this.fileSystem = fileSystem; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java index 9918e4abe..2d04b2574 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java @@ -13,11 +13,11 @@ import com.google.common.base.Splitter; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpClientParams; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; public class OaiCollectorPlugin implements CollectorPlugin { @@ -66,11 +66,11 @@ public class OaiCollectorPlugin implements CollectorPlugin { } if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) { - throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate); + throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + fromDate); } if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) { - throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate); + throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + untilDate); } final Iterator> iters = sets diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java index 4b254c0ef..566c6b216 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.collection.plugin.oai; import java.io.IOException; -import java.io.StringReader; import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; @@ -16,21 +15,21 @@ import org.dom4j.DocumentException; import org.dom4j.DocumentHelper; import org.dom4j.Node; import org.dom4j.io.OutputFormat; -import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpConnector2; import eu.dnetlib.dhp.collection.XmlCleaner; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpConnector2; public class OaiIterator implements Iterator { private static final Logger log = LoggerFactory.getLogger(OaiIterator.class); - private final static String REPORT_PREFIX = "oai:"; + private static final String REPORT_PREFIX = "oai:"; + public static final String UTF_8 = "UTF-8"; private final Queue queue = new PriorityBlockingQueue<>(); @@ -68,7 +67,7 @@ public class OaiIterator implements Iterator { try { this.token = firstPage(); } catch (final CollectorException e) { - throw new RuntimeException(e); + throw new IllegalStateException(e); } } } @@ -90,7 +89,7 @@ public class OaiIterator implements Iterator { try { token = otherPages(token); } catch (final CollectorException e) { - throw new RuntimeException(e); + throw new IllegalStateException(e); } } return res; @@ -99,23 +98,24 @@ public class OaiIterator implements Iterator { @Override public void remove() { + throw new UnsupportedOperationException(); } private String firstPage() throws CollectorException { try { - String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, "UTF-8"); + String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, UTF_8); if (set != null && !set.isEmpty()) { - url += "&set=" + URLEncoder.encode(set, "UTF-8"); + url += "&set=" + URLEncoder.encode(set, UTF_8); } if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX) || fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { - url += "&from=" + URLEncoder.encode(fromDate, "UTF-8"); + url += "&from=" + URLEncoder.encode(fromDate, UTF_8); } if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX) || untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { - url += "&until=" + URLEncoder.encode(untilDate, "UTF-8"); + url += "&until=" + URLEncoder.encode(untilDate, UTF_8); } - log.info("Start harvesting using url: " + url); + log.info("Start harvesting using url: {}", url); return downloadPage(url); } catch (final UnsupportedEncodingException e) { @@ -143,7 +143,7 @@ public class OaiIterator implements Iterator { return downloadPage( baseUrl + "?verb=ListRecords&resumptionToken=" - + URLEncoder.encode(resumptionToken, "UTF-8")); + + URLEncoder.encode(resumptionToken, UTF_8)); } catch (final UnsupportedEncodingException e) { report.put(e.getClass().getName(), e.getMessage()); throw new CollectorException(e); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java index 48f6a94c8..1838223c2 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java @@ -3,9 +3,9 @@ package eu.dnetlib.dhp.collection.plugin.oai; import java.util.Iterator; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; -import eu.dnetlib.dhp.collection.HttpClientParams; -import eu.dnetlib.dhp.collection.HttpConnector2; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.HttpClientParams; +import eu.dnetlib.dhp.common.collection.HttpConnector2; public class OaiIteratorFactory { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java index be2bbcece..997948687 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java @@ -9,11 +9,11 @@ import java.util.stream.StreamSupport; import org.apache.commons.lang3.StringUtils; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpClientParams; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; /** * TODO: delegate HTTP requests to the common HttpConnector2 implementation. diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java index 764c21fc2..64a041fd4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java @@ -30,9 +30,9 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpClientParams; import eu.dnetlib.dhp.collection.JsonUtils; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; /** * log.info(...) equal to log.trace(...) in the application-logs @@ -131,7 +131,8 @@ public class RestIterator implements Iterator { private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath) throws TransformerConfigurationException, XPathExpressionException { - transformer = TransformerFactory.newInstance().newTransformer(); + final TransformerFactory factory = TransformerFactory.newInstance(); + transformer = factory.newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3"); xpath = XPathFactory.newInstance().newXPath(); @@ -142,7 +143,7 @@ public class RestIterator implements Iterator { private void initQueue() { query = baseUrl + "?" + queryParams + querySize + queryFormat; - log.info("REST calls starting with " + query); + log.info("REST calls starting with {}", query); } private void disconnect() { @@ -174,7 +175,7 @@ public class RestIterator implements Iterator { try { query = downloadPage(query); } catch (CollectorException e) { - log.debug("CollectorPlugin.next()-Exception: " + e); + log.debug("CollectorPlugin.next()-Exception: {}", e); throw new RuntimeException(e); } } @@ -198,7 +199,7 @@ public class RestIterator implements Iterator { // check if cursor=* is initial set otherwise add it to the queryParam URL if (resumptionType.equalsIgnoreCase("deep-cursor")) { - log.debug("check resumptionType deep-cursor and check cursor=*?" + query); + log.debug("check resumptionType deep-cursor and check cursor=*?{}", query); if (!query.contains("&cursor=")) { query += "&cursor=*"; } @@ -208,16 +209,16 @@ public class RestIterator implements Iterator { log.info("requestig URL [{}]", query); URL qUrl = new URL(query); - log.debug("authMethod :" + authMethod); + log.debug("authMethod: {}", authMethod); if ("bearer".equalsIgnoreCase(this.authMethod)) { - log.trace("authMethod before inputStream: " + resultXml); + log.trace("authMethod before inputStream: {}", resultXml); HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + authToken); conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType()); conn.setRequestMethod("GET"); theHttpInputStream = conn.getInputStream(); } else if (BASIC.equalsIgnoreCase(this.authMethod)) { - log.trace("authMethod before inputStream: " + resultXml); + log.trace("authMethod before inputStream: {}", resultXml); HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Basic " + authToken); conn.setRequestProperty(HttpHeaders.ACCEPT, ContentType.APPLICATION_XML.getMimeType()); @@ -237,13 +238,13 @@ public class RestIterator implements Iterator { if (!(emptyXml).equalsIgnoreCase(resultXml)) { resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE); nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET); - log.debug("nodeList.length: " + nodeList.getLength()); + log.debug("nodeList.length: {}", nodeList.getLength()); for (int i = 0; i < nodeList.getLength(); i++) { StringWriter sw = new StringWriter(); transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw)); String toEnqueue = sw.toString(); if (toEnqueue == null || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)) { - log.warn("The following record resulted in empty item for the feeding queue: " + resultXml); + log.warn("The following record resulted in empty item for the feeding queue: {}", resultXml); } else { recordQueue.add(sw.toString()); } @@ -274,9 +275,9 @@ public class RestIterator implements Iterator { String[] resumptionKeyValue = arrayUrlArgStr.split("="); if (isInteger(resumptionKeyValue[1])) { urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]); - log.debug("discover OldResumptionSize from Url (int): " + urlOldResumptionSize); + log.debug("discover OldResumptionSize from Url (int): {}", urlOldResumptionSize); } else { - log.debug("discover OldResumptionSize from Url (str): " + resumptionKeyValue[1]); + log.debug("discover OldResumptionSize from Url (str): {}", resumptionKeyValue[1]); } } } @@ -295,7 +296,7 @@ public class RestIterator implements Iterator { discoverResultSize += nodeList.getLength(); } } - log.info("discoverResultSize: {}", discoverResultSize); + log.info("discoverResultSize: {}", discoverResultSize); break; case "pagination": diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index c7201a267..ed867c7f2 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.utils.DHPUtils.*; import java.io.IOException; import java.util.Map; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -23,8 +24,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.aggregation.common.AggregationCounter; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; @@ -67,10 +68,10 @@ public class TransformSparkJobNode { log.info("outputBasePath: {}", outputBasePath); final String isLookupUrl = parser.get("isLookupUrl"); - log.info(String.format("isLookupUrl: %s", isLookupUrl)); + log.info("isLookupUrl: {}", isLookupUrl); final String dateOfTransformation = parser.get("dateOfTransformation"); - log.info(String.format("dateOfTransformation: %s", dateOfTransformation)); + log.info("dateOfTransformation: {}", dateOfTransformation); final Integer rpt = Optional .ofNullable(parser.get("recordsPerTask")) @@ -126,12 +127,13 @@ public class TransformSparkJobNode { JavaRDD mdstore = inputMDStore .javaRDD() .repartition(getRepartitionNumber(totalInput, rpt)) - .map((Function) x::call); + .map((Function) x::call) + .filter((Function) Objects::nonNull); saveDataset(spark.createDataset(mdstore.rdd(), encoder), outputBasePath + MDSTORE_DATA_PATH); - log.info("Transformed item " + ct.getProcessedItems().count()); - log.info("Total item " + ct.getTotalItems().count()); - log.info("Transformation Error item " + ct.getErrorItems().count()); + log.info("Transformed item {}", ct.getProcessedItems().count()); + log.info("Total item {}", ct.getTotalItems().count()); + log.info("Transformation Error item {}", ct.getErrorItems().count()); final long mdStoreSize = spark.read().load(outputBasePath + MDSTORE_DATA_PATH).count(); writeHdfsFile( diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java index 096d0e289..e93f3b518 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java @@ -13,12 +13,18 @@ import eu.dnetlib.dhp.aggregation.common.AggregationCounter; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class TransformationFactory { private static final Logger log = LoggerFactory.getLogger(TransformationFactory.class); - public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') where $x//RESOURCE_IDENTIFIER/@value = \"%s\" return $x//CODE/*[local-name() =\"stylesheet\"]"; + public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') " + + + "where $x//RESOURCE_IDENTIFIER/@value = \"%s\" return $x//CODE/*[local-name() =\"stylesheet\"]"; + + private TransformationFactory() { + } public static MapFunction getTransformationPlugin( final Map jobArgument, final AggregationCounter counters, final ISLookUpService isLookupService) @@ -27,7 +33,7 @@ public class TransformationFactory { try { final String transformationPlugin = jobArgument.get("transformationPlugin"); - log.info("Transformation plugin required " + transformationPlugin); + log.info("Transformation plugin required {}", transformationPlugin); switch (transformationPlugin) { case "XSLT_TRANSFORM": { final String transformationRuleId = jobArgument.get("transformationRuleId"); @@ -38,7 +44,7 @@ public class TransformationFactory { final String transformationRule = queryTransformationRuleFromIS( transformationRuleId, isLookupService); - final long dateOfTransformation = new Long(jobArgument.get("dateOfTransformation")); + final long dateOfTransformation = Long.parseLong(jobArgument.get("dateOfTransformation")); return new XSLTTransformationFunction(counters, transformationRule, dateOfTransformation, vocabularies); @@ -46,7 +52,6 @@ public class TransformationFactory { default: throw new DnetTransformationException( "transformation plugin does not exists for " + transformationPlugin); - } } catch (Throwable e) { @@ -55,9 +60,9 @@ public class TransformationFactory { } private static String queryTransformationRuleFromIS(final String transformationRuleId, - final ISLookUpService isLookUpService) throws Exception { + final ISLookUpService isLookUpService) throws DnetTransformationException, ISLookUpException { final String query = String.format(TRULE_XQUERY, transformationRuleId); - System.out.println("asking query to IS: " + query); + log.info("asking query to IS: {}", query); List result = isLookUpService.quickSearchProfile(query); if (result == null || result.isEmpty()) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java index 9da0747e6..3d57b966f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java @@ -4,11 +4,6 @@ package eu.dnetlib.dhp.transformation.xslt; import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI; import java.io.Serializable; -import java.time.LocalDate; -import java.time.format.DateTimeFormatter; -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions; import net.sf.saxon.s9api.*; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java index e3d588858..1aa549c09 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java @@ -28,22 +28,12 @@ public class PersonCleaner implements ExtensionFunction, Serializable { private static final Set particles = null; - public PersonCleaner() { - - } - private String normalize(String s) { s = Normalizer.normalize(s, Normalizer.Form.NFD); // was NFD s = s.replaceAll("\\(.+\\)", ""); s = s.replaceAll("\\[.+\\]", ""); s = s.replaceAll("\\{.+\\}", ""); s = s.replaceAll("\\s+-\\s+", "-"); - -// s = s.replaceAll("[\\W&&[^,-]]", " "); - -// System.out.println("class Person: s: " + s); - -// s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}&&[^,-]]", " "); s = s.replaceAll("[\\p{Punct}&&[^-,]]", " "); s = s.replace("\\d", " "); s = s.replace("\\n", " "); @@ -51,8 +41,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { s = s.replaceAll("\\s+", " "); if (s.contains(",")) { - // System.out.println("class Person: s: " + s); - String[] arr = s.split(","); if (arr.length == 1) { @@ -60,9 +48,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { } else if (arr.length > 1) { surname = splitTerms(arr[0]); firstname = splitTermsFirstName(arr[1]); -// System.out.println("class Person: surname: " + surname); -// System.out.println("class Person: firstname: " + firstname); - fullname.addAll(surname); fullname.addAll(firstname); } @@ -82,7 +67,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { } if (lastInitialPosition < fullname.size() - 1) { // Case: Michele G. Artini firstname = fullname.subList(0, lastInitialPosition + 1); - System.out.println("name: " + firstname); surname = fullname.subList(lastInitialPosition + 1, fullname.size()); } else if (hasSurnameInUpperCase) { // Case: Michele ARTINI for (String term : fullname) { @@ -119,16 +103,9 @@ public class PersonCleaner implements ExtensionFunction, Serializable { } private List splitTerms(String s) { - if (particles == null) { - // particles = NGramUtils.loadFromClasspath("/eu/dnetlib/pace/config/name_particles.txt"); - } - List list = Lists.newArrayList(); for (String part : Splitter.on(" ").omitEmptyStrings().split(s)) { - // if (!particles.contains(part.toLowerCase())) { list.add(part); - - // } } return list; } @@ -152,9 +129,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { public String getNormalisedFullname() { return isAccurate() ? Joiner.on(" ").join(getSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) : Joiner.on(" ").join(fullname); - // return isAccurate() ? - // Joiner.on(" ").join(getCapitalSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) : - // Joiner.on(" ").join(fullname); } public List getCapitalSurname() { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java index 43291e5de..54192a7be 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java @@ -1,7 +1,6 @@ package eu.dnetlib.dhp.transformation.xslt; -import java.io.ByteArrayInputStream; import java.io.Serializable; import java.io.StringWriter; import java.nio.charset.StandardCharsets; @@ -18,11 +17,11 @@ import net.sf.saxon.s9api.*; public class XSLTTransformationFunction implements MapFunction, Serializable { - public final static String QNAME_BASE_URI = "http://eu/dnetlib/transform"; + public static final String QNAME_BASE_URI = "http://eu/dnetlib/transform"; - private final static String DATASOURCE_ID_PARAM = "varDataSourceId"; + private static final String DATASOURCE_ID_PARAM = "varDataSourceId"; - private final static String DATASOURCE_NAME_PARAM = "varOfficialName"; + private static final String DATASOURCE_NAME_PARAM = "varOfficialName"; private final AggregationCounter aggregationCounter; @@ -38,8 +37,7 @@ public class XSLTTransformationFunction implements MapFunction { diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml index e4f2715fb..bd864a6aa 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml @@ -58,7 +58,7 @@ --hdfsNameNode${nameNode} --fileURL${projectFileURL} --hdfsPath${workingDir}/projects - --classForNameeu.dnetlib.dhp.actionmanager.project.utils.CSVProject + --classForNameeu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject @@ -70,7 +70,7 @@ --hdfsNameNode${nameNode} --fileURL${programmeFileURL} --hdfsPath${workingDir}/programme - --classForNameeu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme + --classForNameeu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme @@ -83,7 +83,7 @@ --fileURL${topicFileURL} --hdfsPath${workingDir}/topic --sheetName${sheetName} - --classForNameeu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic + --classForNameeu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json index b6c9c94b9..9ccb70a9f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json @@ -28,6 +28,11 @@ "paramLongName" : "sheetName", "paramDescription" : "the name of the sheet in case the file is excel", "paramRequired" : false +}, { + "paramName": "d", + "paramLongName" : "delimiter", + "paramDescription" : "the delimiter between fields in case it is not ;", + "paramRequired" : false } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java index 7200d2896..f2158748b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.actionmanager.bipfinder; +import static org.junit.jupiter.api.Assertions.*; + import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -67,7 +69,7 @@ public class SparkAtomicActionScoreJobTest { } @Test - public void matchOne() throws Exception { + void matchOne() throws Exception { String bipScoresPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") .getPath(); @@ -98,7 +100,7 @@ public class SparkAtomicActionScoreJobTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Publication) aa.getPayload())); - Assertions.assertTrue(tmp.count() == 1); + assertEquals(1, tmp.count()); Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); verificationDataset.createOrReplaceTempView("publication"); @@ -129,7 +131,7 @@ public class SparkAtomicActionScoreJobTest { } @Test - public void matchOneWithTwo() throws Exception { + void matchOneWithTwo() throws Exception { String bipScoresPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") .getPath(); @@ -160,7 +162,7 @@ public class SparkAtomicActionScoreJobTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Publication) aa.getPayload())); - Assertions.assertTrue(tmp.count() == 1); + assertEquals(1, tmp.count()); Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); verificationDataset.createOrReplaceTempView("publication"); @@ -190,23 +192,21 @@ public class SparkAtomicActionScoreJobTest { List tmp_ds = execVerification.filter("id = 'influence'").select("value").collectAsList(); String tmp_influence = tmp_ds.get(0).getString(0); - Assertions - .assertTrue( - "1.47565045883e-08".equals(tmp_influence) || - "1.98956540239e-08".equals(tmp_influence)); + assertTrue( + "1.47565045883e-08".equals(tmp_influence) || + "1.98956540239e-08".equals(tmp_influence)); tmp_influence = tmp_ds.get(1).getString(0); - Assertions - .assertTrue( - "1.47565045883e-08".equals(tmp_influence) || - "1.98956540239e-08".equals(tmp_influence)); + assertTrue( + "1.47565045883e-08".equals(tmp_influence) || + "1.98956540239e-08".equals(tmp_influence)); - Assertions.assertTrue(!tmp_ds.get(0).getString(0).equals(tmp_ds.get(1).getString(0))); + assertNotEquals(tmp_ds.get(1).getString(0), tmp_ds.get(0).getString(0)); } @Test - public void matchTwo() throws Exception { + void matchTwo() throws Exception { String bipScoresPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") .getPath(); @@ -237,7 +237,7 @@ public class SparkAtomicActionScoreJobTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Publication) aa.getPayload())); - Assertions.assertTrue(tmp.count() == 2); + assertEquals(2, tmp.count()); Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); verificationDataset.createOrReplaceTempView("publication"); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java deleted file mode 100644 index da5beecf9..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java +++ /dev/null @@ -1,31 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.project; - -import java.util.List; - -import org.apache.commons.io.IOUtils; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser; - -public class CSVParserTest { - - @Test - public void readProgrammeTest() throws Exception { - - String programmecsv = IOUtils - .toString( - getClass() - .getClassLoader() - .getResourceAsStream("eu/dnetlib/dhp/actionmanager/project/programme.csv")); - - CSVParser csvParser = new CSVParser(); - - List pl = csvParser.parse(programmecsv, "eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme"); - - Assertions.assertEquals(24, pl.size()); - - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/DownloadCsvTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/DownloadCsvTest.java new file mode 100644 index 000000000..21fb63273 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/DownloadCsvTest.java @@ -0,0 +1,148 @@ + +package eu.dnetlib.dhp.actionmanager.project; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Files; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.jupiter.api.*; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.GetCSV; +import eu.dnetlib.dhp.common.collection.HttpConnector2; + +public class DownloadCsvTest { + + private static String workingDir; + + private static LocalFileSystem fs; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(DownloadCsvTest.class.getSimpleName()) + .toString(); + + fs = FileSystem.getLocal(new Configuration()); + } + + @Disabled + @Test + void getProgrammeFileTest() throws Exception { + + String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv"; + + GetCSV + .getCsv( + fs, new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/programme", + CSVProgramme.class.getName(), ';'); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + CSVProgramme csvp = new ObjectMapper().readValue(line, CSVProgramme.class); + if (count == 0) { + assertTrue(csvp.getCode().equals("H2020-EU.5.f.")); + assertTrue( + csvp + .getTitle() + .startsWith( + "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); + assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); + assertTrue(csvp.getShortTitle().equals("")); + assertTrue(csvp.getLanguage().equals("en")); + } + if (count == 28) { + assertTrue(csvp.getCode().equals("H2020-EU.3.5.4.")); + assertTrue( + csvp + .getTitle() + .equals( + "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); + assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); + assertTrue(csvp.getLanguage().equals("de")); + } + if (count == 229) { + assertTrue(csvp.getCode().equals("H2020-EU.3.2.")); + assertTrue( + csvp + .getTitle() + .equals( + "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); + assertTrue( + csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); + assertTrue(csvp.getLanguage().equals("en")); + } + assertTrue(csvp.getCode() != null); + assertTrue(csvp.getCode().startsWith("H2020")); + count += 1; + } + + Assertions.assertEquals(767, count); + } + + @Disabled + @Test + void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException { + String fileURL = "https://cordis.europa.eu/data/cordis-h2020projects.csv"; + + GetCSV + .getCsv( + fs, + new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/projects", + CSVProject.class.getName(), ';'); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + CSVProject csvp = new ObjectMapper().readValue(line, CSVProject.class); + if (count == 0) { + assertTrue(csvp.getId().equals("771736")); + assertTrue(csvp.getProgramme().equals("H2020-EU.1.1.")); + assertTrue(csvp.getTopics().equals("ERC-2017-COG")); + + } + if (count == 22882) { + assertTrue(csvp.getId().equals("752903")); + assertTrue(csvp.getProgramme().equals("H2020-EU.1.3.2.")); + assertTrue(csvp.getTopics().equals("MSCA-IF-2016")); + } + if (count == 223023) { + assertTrue(csvp.getId().equals("861952")); + assertTrue(csvp.getProgramme().equals("H2020-EU.4.e.")); + assertTrue(csvp.getTopics().equals("SGA-SEWP-COST-2019")); + } + assertTrue(csvp.getId() != null); + assertTrue(csvp.getProgramme().startsWith("H2020")); + count += 1; + } + + Assertions.assertEquals(34957, count); + } + + @AfterAll + public static void cleanup() { + FileUtils.deleteQuietly(new File(workingDir)); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java index b7155bc3a..d27a732ea 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java @@ -13,24 +13,24 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import eu.dnetlib.dhp.actionmanager.project.utils.EXCELParser; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpConnector2; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpConnector2; @Disabled public class EXCELParserTest { private static Path workingDir; - private HttpConnector2 httpConnector = new HttpConnector2(); + private final HttpConnector2 httpConnector = new HttpConnector2(); private static final String URL = "https://cordis.europa.eu/data/reference/cordisref-h2020topics.xlsx"; @BeforeAll public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(CSVParserTest.class.getSimpleName()); + workingDir = Files.createTempDirectory(EXCELParserTest.class.getSimpleName()); } @Test - public void test1() throws CollectorException, IOException, InvalidFormatException, ClassNotFoundException, + void test1() throws CollectorException, IOException, InvalidFormatException, ClassNotFoundException, IllegalAccessException, InstantiationException { EXCELParser excelParser = new EXCELParser(); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java index 256dc0521..680872126 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java @@ -21,7 +21,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme; public class PrepareH2020ProgrammeTest { @@ -66,7 +66,7 @@ public class PrepareH2020ProgrammeTest { } @Test - public void numberDistinctProgrammeTest() throws Exception { + void numberDistinctProgrammeTest() throws Exception { PrepareProgramme .main( new String[] { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java index 0db3485f5..d0c50a054 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java @@ -9,6 +9,7 @@ import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; @@ -21,7 +22,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; public class PrepareProjectTest { @@ -66,7 +67,7 @@ public class PrepareProjectTest { } @Test - public void numberDistinctProjectTest() throws Exception { + void numberDistinctProjectTest() throws Exception { PrepareProjects .main( new String[] { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java index 42e494681..58365e026 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java @@ -69,7 +69,7 @@ public class SparkUpdateProjectTest { } @Test - public void numberDistinctProgrammeTest() throws Exception { + void numberDistinctProgrammeTest() throws Exception { SparkAtomicActionJob .main( new String[] { @@ -78,7 +78,7 @@ public class SparkUpdateProjectTest { "-programmePath", getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz") + "/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json") .getPath(), "-projectPath", getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json").getPath(), diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java index f16901cb4..aa11f4ab5 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp.actionmanager.ror; import java.io.FileInputStream; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -30,7 +32,9 @@ class GenerateRorActionSetJobTest { .readValue(IOUtils.toString(getClass().getResourceAsStream("ror_org.json")), RorOrganization.class); final Organization org = GenerateRorActionSetJob.convertRorOrg(r); - System.out.println(mapper.writeValueAsString(org)); + final String s = mapper.writeValueAsString(org); + Assertions.assertTrue(StringUtils.isNotBlank(s)); + System.out.println(s); } @Test @@ -39,7 +43,9 @@ class GenerateRorActionSetJobTest { .readValue(IOUtils.toString(new FileInputStream(local_file_path)), RorOrganization[].class); for (final RorOrganization r : arr) { - GenerateRorActionSetJob.convertRorOrg(r); + Organization o = GenerateRorActionSetJob.convertRorOrg(r); + Assertions.assertNotNull(o); + Assertions.assertTrue(StringUtils.isNotBlank(o.getId())); } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionWorkflowTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionWorkflowTest.java index 05dd6b1d3..1a10b5f64 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionWorkflowTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionWorkflowTest.java @@ -18,6 +18,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; @TestMethodOrder(MethodOrderer.OrderAnnotation.class) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java index afb6ae6a1..633a47379 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java @@ -97,7 +97,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { @Test @Order(1) - public void testGenerateNativeStoreSparkJobRefresh() throws Exception { + void testGenerateNativeStoreSparkJobRefresh() throws Exception { MDStoreVersion mdStoreV1 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_1.json"); FileUtils.forceMkdir(new File(mdStoreV1.getHdfsPath())); @@ -125,7 +125,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { @Test @Order(2) - public void testGenerateNativeStoreSparkJobIncremental() throws Exception { + void testGenerateNativeStoreSparkJobIncremental() throws Exception { MDStoreVersion mdStoreV2 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_2.json"); FileUtils.forceMkdir(new File(mdStoreV2.getHdfsPath())); @@ -155,7 +155,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { @Test @Order(3) - public void testTransformSparkJob() throws Exception { + void testTransformSparkJob() throws Exception { setUpVocabulary(); @@ -206,7 +206,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { } @Test - public void testJSONSerialization() throws Exception { + void testJSONSerialization() throws Exception { final String s = IOUtils.toString(getClass().getResourceAsStream("mdStoreVersion_1.json")); System.out.println("s = " + s); final ObjectMapper mapper = new ObjectMapper(); @@ -217,7 +217,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { } @Test - public void testGenerationMetadataRecord() throws Exception { + void testGenerationMetadataRecord() throws Exception { final String xml = IOUtils.toString(this.getClass().getResourceAsStream("./record.xml")); @@ -236,7 +236,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { } @Test - public void testEquals() throws IOException { + void testEquals() throws IOException { final String xml = IOUtils.toString(this.getClass().getResourceAsStream("./record.xml")); final MetadataRecord record = GenerateNativeStoreSparkJob diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java index efe925175..f708c367b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java @@ -12,16 +12,16 @@ import org.junit.jupiter.api.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpClientParams; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; /** * @author js, Andreas Czerniak * */ -public class RestCollectorPluginTest { +class RestCollectorPluginTest { private static final Logger log = LoggerFactory.getLogger(RestCollectorPluginTest.class); @@ -65,7 +65,7 @@ public class RestCollectorPluginTest { @Disabled @Test - public void test() throws CollectorException { + void test() throws CollectorException { AtomicInteger i = new AtomicInteger(0); final Stream stream = rcp.collect(api, new AggregatorReport()); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestIteratorTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestIteratorTest.java index 9f75bd468..906f69dc9 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestIteratorTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestIteratorTest.java @@ -9,7 +9,7 @@ import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.collection.HttpClientParams; +import eu.dnetlib.dhp.common.collection.HttpClientParams; /** * diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java index b5ea5f069..f52f4632a 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.collector.worker; -import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.*; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -11,10 +11,10 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.collection.ApiDescriptor; @Disabled -public class CollectorWorkerApplicationTests { +class CollectorWorkerApplicationTests { @Test - public void testCollectionOAI() throws Exception { + void testCollectionOAI() throws Exception { final ApiDescriptor api = new ApiDescriptor(); api.setId("oai"); api.setProtocol("oai"); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java index 948a8f93b..7bd7baaea 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java @@ -33,7 +33,7 @@ import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @ExtendWith(MockitoExtension.class) -public class TransformationJobTest extends AbstractVocabularyTest { +class TransformationJobTest extends AbstractVocabularyTest { private SparkConf sparkConf; @@ -49,7 +49,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Date cleaner") - public void testDateCleaner() throws Exception { + void testDateCleaner() throws Exception { DateCleaner dc = new DateCleaner(); assertEquals("1982-09-20", dc.clean("20/09/1982")); assertEquals("2002-09-20", dc.clean("20-09-2002")); @@ -60,7 +60,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform Single XML using zenodo_tr XSLTTransformator") - public void testTransformSaxonHE() throws Exception { + void testTransformSaxonHE() throws Exception { // We Set the input Record getting the XML from the classpath final MetadataRecord mr = new MetadataRecord(); @@ -79,7 +79,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform Inst.&Them.v4 record XML with zenodo_tr") - public void testTransformITGv4Zenodo() throws Exception { + void testTransformITGv4Zenodo() throws Exception { // We Set the input Record getting the XML from the classpath final MetadataRecord mr = new MetadataRecord(); @@ -97,7 +97,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform record XML with xslt_cleaning_datarepo_datacite/oaiOpenAIRE") - public void testTransformMostlyUsedScript() throws Exception { + void testTransformMostlyUsedScript() throws Exception { String xslTransformationScript = ""; xslTransformationScript = "/eu/dnetlib/dhp/transform/scripts/xslt_cleaning_datarepo_datacite.xsl"; @@ -119,7 +119,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform record XML with xslt_cleaning_REST_OmicsDI") - public void testTransformRestScript() throws Exception { + void testTransformRestScript() throws Exception { String xslTransformationScript = ""; xslTransformationScript = "/eu/dnetlib/dhp/transform/scripts/xslt_cleaning_REST_OmicsDI.xsl"; @@ -140,7 +140,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test TransformSparkJobNode.main with oaiOpenaire_datacite (v4)") - public void transformTestITGv4OAIdatacite(@TempDir Path testDir) throws Exception { + void transformTestITGv4OAIdatacite(@TempDir Path testDir) throws Exception { try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) { @@ -203,7 +203,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test TransformSparkJobNode.main") - public void transformTest(@TempDir Path testDir) throws Exception { + void transformTest(@TempDir Path testDir) throws Exception { try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) { diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json new file mode 100644 index 000000000..e8e04e8db --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json @@ -0,0 +1,277 @@ +{"code":"H2020-EU.5.g.","title":"Take due and proportional precautions in research and innovation activities by anticipating and assessing potential environmental, health and safety impacts","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Take due and proportional precautions in research and innovation activities by anticipating and assessing potential environmental, health and safety impacts","classification_short":"Science with and for Society | Take due and proportional precautions in research and innovation activities by anticipating and assessing potential environmental, health and safety impacts"} +{"code":"H2020-EU.3.4.2.1.","title":"A substantial reduction of traffic congestion","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | A substantial reduction of traffic congestion","classification_short":"Societal Challenges | Transport | Mobility, safety and security | A substantial reduction of traffic congestion"} +{"code":"H2020-EU.3.4.5.4.","title":"ITD Airframe","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ITD Airframe","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ITD Airframe"} +{"code":"H2020-EU.3.3.8.1.","title":"Increase the electrical efficiency and the durability of the different fuel cells used for power production to levels which can compete with conventional technologies, while reducing costs","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives) | Increase the electrical efficiency and the durability of the different fuel cells used for power production to levels which can compete with conventional technologies, while reducing costs","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives) | Increase the electrical efficiency and the durability of the different fuel cells used for power production to levels which can compete with conventional technologies, while reducing costs"} +{"code":"H2020-EU.3.7.1.","title":"Fight crime, illegal trafficking and terrorism, including understanding and tackling terrorist ideas and beliefs","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Fight crime, illegal trafficking and terrorism, including understanding and tackling terrorist ideas and beliefs","classification_short":"Societal Challenges | Secure societies | Fight crime, illegal trafficking and terrorism, including understanding and tackling terrorist ideas and beliefs"} +{"code":"H2020-EU.3.4.1.1.","title":"Making aircraft, vehicles and vessels cleaner and quieter will improve environmental performance and reduce perceived noise and vibration","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment | Making aircraft, vehicles and vessels cleaner and quieter will improve environmental performance and reduce perceived noise and vibration","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment | Making aircraft, vehicles and vessels cleaner and quieter will improve environmental performance and reduce perceived noise and vibration"} +{"code":"H2020-EU.1.4.3.","title":"Reinforcing European research infrastructure policy and international cooperation","shortTitle":"Research infrastructure policy and international cooperation","language":"en","classification":"Excellent science | Research Infrastructures | Reinforcing European research infrastructure policy and international cooperation","classification_short":"Excellent Science | Research Infrastructures | Research infrastructure policy and international cooperation"} +{"code":"H2020-EU.1.4.","title":"EXCELLENT SCIENCE - Research Infrastructures","shortTitle":"Research Infrastructures","language":"en","classification":"Excellent science | Research Infrastructures","classification_short":"Excellent Science | Research Infrastructures"} +{"code":"H2020-EU.3.4.6.1.","title":"Reduce the production cost of fuel cell systems to be used in transport applications, while increasing their lifetime to levels which can compete with conventional technologies","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | FCH2 (transport objectives) | Reduce the production cost of fuel cell systems to be used in transport applications, while increasing their lifetime to levels which can compete with conventional technologies","classification_short":"Societal Challenges | Transport | FCH2 (transport objectives) | Reduce the production cost of fuel cell systems to be used in transport applications, while increasing their lifetime to levels which can compete with conventional technologies"} +{"code":"H2020-EU.3.4.5.5.","title":"ITD Engines","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ITD Engines","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ITD Engines"} +{"code":"H2020-EU.2.1.1.7.3.","title":"Multi-disciplinary approaches for smart systems, supported by developments in holistic design and advanced manufacturing to realise self-reliant and adaptable smart systems having sophisticated interfaces and offering complex functionalities based on, for example, the seamless integration of sensing, actuating, processing, energy provision and networking","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL | Multi-disciplinary approaches for smart systems, supported by developments in holistic design and advanced manufacturing to realise self-reliant and adaptable smart systems having sophisticated interfaces and offering complex functionalities based on, for example, the seamless integration of sensing, actuating, processing, energy provision and networking","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL | Multi-disciplinary approaches for smart systems, supported by developments in holistic design and advanced manufacturing to realise self-reliant and adaptable smart systems having sophisticated interfaces and offering complex functionalities based on, for example, the seamless integration of sensing, actuating, processing, energy provision and networking"} +{"code":"H2020-EU.3.1.6.1.","title":"Promoting integrated care","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Health care provision and integrated care | Promoting integrated care","classification_short":"Societal Challenges | Health | Health care provision and integrated care | Promoting integrated care"} +{"code":"H2020-EU.3.7.6.","title":"Ensure privacy and freedom, including in the Internet and enhance the societal, legal and ethical understanding of all areas of security, risk and management","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Ensure privacy and freedom, including in the Internet and enhance the societal, legal and ethical understanding of all areas of security, risk and management","classification_short":"Societal Challenges | Secure societies | Ensure privacy and freedom, including in the Internet and enhance the societal, legal and ethical understanding of all areas of security, risk and management"} +{"code":"H2020-EU.3.4.2.3.","title":"Developing new concepts of freight transport and logistics","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | Developing new concepts of freight transport and logistics","classification_short":"Societal Challenges | Transport | Mobility, safety and security | Developing new concepts of freight transport and logistics"} +{"code":"H2020-EU.3.3.2.1.","title":"Develop the full potential of wind energy","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop the full potential of wind energy","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop the full potential of wind energy"} +{"code":"H2020-EU.3.2.5.","title":"Cross-cutting marine and maritime research","shortTitle":"Cross-cutting marine and maritime research","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research"} +{"code":"H2020-EU.3.4.7.","title":"SESAR JU","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | SESAR JU","classification_short":"Societal Challenges | Transport | SESAR JU"} +{"code":"H2020-EU.2.1.3.3.","title":"Management of materials components","shortTitle":"Management of materials components","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Management of materials components","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Management of materials components"} +{"code":"H2020-EU.3.3.3.","title":"Alternative fuels and mobile energy sources","shortTitle":"Alternative fuels and mobile energy sources","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources"} +{"code":"H2020-EU.7.","title":"THE EUROPEAN INSTITUTE OF INNOVATION AND TECHNOLOGY (EIT)","shortTitle":"European Institute of Innovation and Technology (EIT)","language":"en","classification":"THE EUROPEAN INSTITUTE OF INNOVATION AND TECHNOLOGY (EIT)","classification_short":"European Institute of Innovation and Technology (EIT)"} +{"code":"H2020-EU.3.5.4.1.","title":"Strengthen eco-innovative technologies, processes, services and products including exploring ways to reduce the quantities of raw materials in production and consumption, and overcoming barriers in this context and boost their market uptake","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Strengthen eco-innovative technologies, processes, services and products including exploring ways to reduce the quantities of raw materials in production and consumption, and overcoming barriers in this context and boost their market uptake","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Strengthen eco-innovative technologies, processes, services and products including exploring ways to reduce the quantities of raw materials in production and consumption, and overcoming barriers in this context and boost their market uptake"} +{"code":"H2020-EU.3.1.4.","title":"Active ageing and self-management of health","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Active ageing and self-management of health","classification_short":"Societal Challenges | Health | Active ageing and self-management of health"} +{"code":"H2020-EU.1.","title":"Excellent science","shortTitle":"Excellent Science","language":"en","classification":"Excellent science","classification_short":"Excellent Science"} +{"code":"H2020-EU.3.5.6.1.","title":"Identifying resilience levels via observations, monitoring and modelling","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Cultural heritage | Identifying resilience levels via observations, monitoring and modelling","classification_short":"Societal Challenges | Climate and environment | Cultural heritage | Identifying resilience levels via observations, monitoring and modelling"} +{"code":"H2020-EU.3.2.4.3.","title":"Supporting market development for bio-based products and processes","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy | Supporting market development for bio-based products and processes","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy | Supporting market development for bio-based products and processes"} +{"code":"H2020-EU.2.1.6.1.","title":"Enabling European competitiveness, non-dependence and innovation of the European space sector","shortTitle":"Competitiveness, non-dependence and innovation","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European competitiveness, non-dependence and innovation of the European space sector","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Competitiveness, non-dependence and innovation"} +{"code":"H2020-EU.4.b.","title":"Twinning of research institutions","shortTitle":"Twinning of research institutions","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Twinning of research institutions","classification_short":"Spreading excellence and widening participation | Twinning of research institutions"} +{"code":"H2020-EU.3.1.7.6.","title":"Psychiatric diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Psychiatric diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Psychiatric diseases"} +{"code":"H2020-EU.3.1.2.2.","title":"Improving diagnosis and prognosis","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease | Improving diagnosis and prognosis","classification_short":"Societal Challenges | Health | Preventing disease | Improving diagnosis and prognosis"} +{"code":"H2020-EU.3.4.5.3.","title":"IADP Fast Rotorcraft","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | IADP Fast Rotorcraft","classification_short":"Societal Challenges | Transport | CLEANSKY2 | IADP Fast Rotorcraft"} +{"code":"H2020-EU.3.1.3.1.","title":"Treating disease, including developing regenerative medicine","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Treating and managing disease | Treating disease, including developing regenerative medicine","classification_short":"Societal Challenges | Health | Treating and managing disease | Treating disease, including developing regenerative medicine"} +{"code":"H2020-EU.3.4.3.3.","title":"Advanced production processes","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | Advanced production processes","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | Advanced production processes"} +{"code":"H2020-EU.3.1.7.","title":"Innovative Medicines Initiative 2 (IMI2)","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2)","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2)"} +{"code":"H2020-EU.3.6.3.2.","title":"Research into European countries' and regions' history, literature, art, philosophy and religions and how these have informed contemporary European diversity","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity | Research into European countries' and regions' history, literature, art, philosophy and religions and how these have informed contemporary European diversity","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies | Research into European countries' and regions' history, literature, art, philosophy and religions and how these have informed contemporary European diversity"} +{"code":"H2020-EU.3.5.1.2.","title":"Assess impacts, vulnerabilities and develop innovative cost-effective adaptation and risk prevention and management measures","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change | Assess impacts, vulnerabilities and develop innovative cost-effective adaptation and risk prevention and management measures","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change | Assess impacts, vulnerabilities and develop innovative cost-effective adaptation and risk prevention and management measures"} +{"code":"H2020-EU.3.6.1.","title":"Inclusive societies","shortTitle":"Inclusive societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies"} +{"code":"H2020-EU.3.2.","title":"SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy","shortTitle":"Food, agriculture, forestry, marine research and bioeconomy","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy"} +{"code":"H2020-EU.2.1.6.1.2.","title":"Boost innovation between space and non-space sectors","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European competitiveness, non-dependence and innovation of the European space sector | Boost innovation between space and non-space sectors","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Competitiveness, non-dependence and innovation | Boost innovation between space and non-space sectors"} +{"code":"H2020-EU.2.1.3.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies - Advanced materials","shortTitle":"Advanced materials","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials"} +{"code":"H2020-EU.2.1.2.3.","title":"Developing the societal dimension of nanotechnology","shortTitle":"Societal dimension of nanotechnology","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Developing the societal dimension of nanotechnology","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Societal dimension of nanotechnology"} +{"code":"H2020-EU.4.","title":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION","shortTitle":"Spreading excellence and widening participation","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION","classification_short":"Spreading excellence and widening participation"} +{"code":"H2020-EU.3.6.1.2.","title":"Trusted organisations, practices, services and policies that are necessary to build resilient, inclusive, participatory, open and creative societies in Europe, in particular taking into account migration, integration and demographic change","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | Trusted organisations, practices, services and policies that are necessary to build resilient, inclusive, participatory, open and creative societies in Europe, in particular taking into account migration, integration and demographic change","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | Trusted organisations, practices, services and policies that are necessary to build resilient, inclusive, participatory, open and creative societies in Europe, in particular taking into account migration, integration and demographic change"} +{"code":"H2020-EU.3.4.2.","title":"Better mobility, less congestion, more safety and security","shortTitle":"Mobility, safety and security","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security","classification_short":"Societal Challenges | Transport | Mobility, safety and security"} +{"code":"H2020-EU.3.1.7.13.","title":"Other","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Other","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Other"} +{"code":"H2020-EU.3.3.3.3.","title":"New alternative fuels","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources | New alternative fuels","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources | New alternative fuels"} +{"code":"H2020-EU.2.1.3.5.","title":"Materials for creative industries, including heritage","shortTitle":"Materials for creative industries, including heritage","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Materials for creative industries, including heritage","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Materials for creative industries, including heritage"} +{"code":"H2020-EU.3.3.3.2.","title":"Reducing time to market for hydrogen and fuel cells technologies","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources | Reducing time to market for hydrogen and fuel cells technologies","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources | Reducing time to market for hydrogen and fuel cells technologies"} +{"code":"H2020-EU.5.d.","title":"Encourage citizens to engage in science through formal and informal science education, and promote the diffusion of science-based activities, namely in science centres and through other appropriate channels","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Encourage citizens to engage in science through formal and informal science education, and promote the diffusion of science-based activities, namely in science centres and through other appropriate channels","classification_short":"Science with and for Society | Encourage citizens to engage in science through formal and informal science education, and promote the diffusion of science-based activities, namely in science centres and through other appropriate channels"} +{"code":"H2020-EU.3.1.","title":"SOCIETAL CHALLENGES - Health, demographic change and well-being","shortTitle":"Health","language":"en","classification":"Societal challenges | Health, demographic change and well-being","classification_short":"Societal Challenges | Health"} +{"code":"H2020-EU.3.5.3.1.","title":"Improve the knowledge base on the availability of raw materials","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Improve the knowledge base on the availability of raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Improve the knowledge base on the availability of raw materials"} +{"code":"H2020-EU.3.2.1.4.","title":"Sustainable forestry","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Sustainable forestry","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Sustainable forestry"} +{"code":"H2020-EU.3.3.","title":"SOCIETAL CHALLENGES - Secure, clean and efficient energy","shortTitle":"Energy","language":"en","classification":"Societal challenges | Secure, clean and efficient energy","classification_short":"Societal Challenges | Energy"} +{"code":"H2020-EU.3.4.8.1.","title":"Innovation Programme 1 (IP1): Cost-efficient and reliable trains","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 1 (IP1): Cost-efficient and reliable trains","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 1 (IP1): Cost-efficient and reliable trains"} +{"code":"H2020-EU.2.3.2.1.","title":"Support for research intensive SMEs","shortTitle":"Support for research intensive SMEs","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support | Support for research intensive SMEs","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support | Support for research intensive SMEs"} +{"code":"H2020-EU.2.1.3.2.","title":"Materials development and transformation","shortTitle":"Materials development and transformation","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Materials development and transformation","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Materials development and transformation"} +{"code":"H2020-EU.1.4.1.3.","title":"Development, deployment and operation of ICT-based e-infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond | Development, deployment and operation of ICT-based e-infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond | Development, deployment and operation of ICT-based e-infrastructures"} +{"code":"H2020-EU.3.5.4.2.","title":"Support innovative policies and societal changes","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Support innovative policies and societal changes","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Support innovative policies and societal changes"} +{"code":"H2020-EU.2.1.3.6.","title":"Metrology, characterisation, standardisation and quality control","shortTitle":"Metrology, characterisation, standardisation and quality control","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Metrology, characterisation, standardisation and quality control","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Metrology, characterisation, standardisation and quality control"} +{"code":"H2020-EU.3.4.5.8.","title":"ECO Transverse Area","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ECO Transverse Area","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ECO Transverse Area"} +{"code":"H2020-EU.5.f.","title":"Develop the governance for the advancement of responsible research and innovation by all stakeholders, which is sensitive to society needs and demands and promote an ethics framework for research and innovation","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Develop the governance for the advancement of responsible research and innovation by all stakeholders, which is sensitive to society needs and demands and promote an ethics framework for research and innovation","classification_short":"Science with and for Society | Develop the governance for the advancement of responsible research and innovation by all stakeholders, which is sensitive to society needs and demands and promote an ethics framework for research and innovation"} +{"code":"H2020-EU.5.h.","title":"Improving knowledge on science communication in order to improve the quality and effectiveness of interactions between scientists, general media and the public","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Improving knowledge on science communication in order to improve the quality and effectiveness of interactions between scientists, general media and the public","classification_short":"Science with and for Society | Improving knowledge on science communication in order to improve the quality and effectiveness of interactions between scientists, general media and the public"} +{"code":"H2020-EU.2.1.1.7.1.","title":"Design technologies, process and integration, equipment, materials and manufacturing for micro- and nanoelectronics while targeting miniaturisation, diversification and differentiation, heterogeneous integration","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL | Design technologies, process and integration, equipment, materials and manufacturing for micro- and nanoelectronics while targeting miniaturisation, diversification and differentiation, heterogeneous integration","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL | Design technologies, process and integration, equipment, materials and manufacturing for micro- and nanoelectronics while targeting miniaturisation, diversification and differentiation, heterogeneous integration"} +{"code":"H2020-EU.3.7.5.","title":"Increase Europe's resilience to crises and disasters","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Increase Europe's resilience to crises and disasters","classification_short":"Societal Challenges | Secure societies | Increase Europe's resilience to crises and disasters"} +{"code":"H2020-EU.1.4.2.2.","title":"Strengthening the human capital of research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Fostering the innovation potential of research infrastructures and their human resources | Strengthening the human capital of research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures and their human resources | Strengthening the human capital of research infrastructures"} +{"code":"H2020-EU.3.4.1.2.","title":"Developing smart equipment, infrastructures and services","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment | Developing smart equipment, infrastructures and services","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment | Developing smart equipment, infrastructures and services"} +{"code":"H2020-EU.2.3.2.2.","title":"Enhancing the innovation capacity of SMEs","shortTitle":"Enhancing the innovation capacity of SMEs","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support | Enhancing the innovation capacity of SMEs","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support | Enhancing the innovation capacity of SMEs"} +{"code":"H2020-EU.1.3.5.","title":"Specific support and policy actions","shortTitle":"MSCA Specific support","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Specific support and policy actions","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Specific support"} +{"code":"H2020-EU.3.2.3.3.","title":"Boosting marine and maritime innovation through biotechnology","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources | Boosting marine and maritime innovation through biotechnology","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources | Boosting marine and maritime innovation through biotechnology"} +{"code":"H2020-EU.3.2.1.2.","title":"Providing ecosystems services and public goods","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Providing ecosystems services and public goods","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Providing ecosystems services and public goods"} +{"code":"H2020-EU.2.3.2.3.","title":"Supporting market-driven innovation","shortTitle":"Supporting market-driven innovation","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support | Supporting market-driven innovation","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support | Supporting market-driven innovation"} +{"code":"H2020-EU.5.a.","title":"Make scientific and technological careers attractive to young students, and forster sustainable interaction between schools, research institutions, industry and civil society organisations","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Make scientific and technological careers attractive to young students, and forster sustainable interaction between schools, research institutions, industry and civil society organisations","classification_short":"Science with and for Society | Make scientific and technological careers attractive to young students, and forster sustainable interaction between schools, research institutions, industry and civil society organisations"} +{"code":"H2020-EU.3.1.7.9.","title":"Ageing-associated diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Ageing-associated diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Ageing-associated diseases"} +{"code":"H2020-EU.2.2.1.","title":"The Debt facility providing debt finance for R&I: 'Union loan and guarantee service for research and innovation'","shortTitle":"Debt facility","language":"en","classification":"Industrial leadership | Access to risk finance | The Debt facility providing debt finance for R&I: 'Union loan and guarantee service for research and innovation'","classification_short":"Industrial Leadership | Access to risk finance | Debt facility"} +{"code":"H2020-Euratom-1.8.","title":"Ensure availability and use of research infrastructures of pan_european relevance","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Ensure availability and use of research infrastructures of pan_european relevance","classification_short":"Euratom | Indirect actions | Ensure availability and use of research infrastructures of pan_european relevance"} +{"code":"H2020-EU.3.2.2.1.","title":"Informed consumer choices","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Informed consumer choices","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Informed consumer choices"} +{"code":"H2020-EU.3.7.","title":"Secure societies - Protecting freedom and security of Europe and its citizens","shortTitle":"Secure societies","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens","classification_short":"Societal Challenges | Secure societies"} +{"code":"H2020-EU.1.3.4.","title":"Increasing structural impact by co-funding activities","shortTitle":"MSCA Co-funding","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Increasing structural impact by co-funding activities","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Co-funding"} +{"code":"H2020-EU.2.1.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies","shortTitle":"Leadership in enabling and industrial technologies (LEIT)","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT)"} +{"code":"H2020-EU.2.1.3.4.","title":"Materials for a sustainable, resource-efficient and low-emission industry","shortTitle":"Materials for a resource-efficient and low-emission industry","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Materials for a sustainable, resource-efficient and low-emission industry","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Materials for a resource-efficient and low-emission industry"} +{"code":"H2020-EU.3.4.5.7.","title":"Small Air Transport (SAT) Transverse Area","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | Small Air Transport (SAT) Transverse Area","classification_short":"Societal Challenges | Transport | CLEANSKY2 | Small Air Transport (SAT) Transverse Area"} +{"code":"H2020-EU.3.4.8.3.","title":"Innovation Programme 3: Cost Efficient and Reliable High Capacity Infrastructure","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 3: Cost Efficient and Reliable High Capacity Infrastructure","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 3: Cost Efficient and Reliable High Capacity Infrastructure"} +{"code":"H2020-Euratom-1.1.","title":"Support safe operation of nuclear systems","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Support safe operation of nuclear systems","classification_short":"Euratom | Indirect actions | Support safe operation of nuclear systems"} +{"code":"H2020-EU.2.3.1.","title":" Mainstreaming SME support, especially through a dedicated instrument","shortTitle":"Mainstreaming SME support","language":"en","classification":"Industrial leadership | Innovation In SMEs | Mainstreaming SME support, especially through a dedicated instrument","classification_short":"Industrial Leadership | Innovation in SMEs | Mainstreaming SME support"} +{"code":"H2020-EU.1.4.3.1.","title":"Reinforcing European policy for research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Reinforcing European research infrastructure policy and international cooperation | Reinforcing European policy for research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructure policy and international cooperation | Reinforcing European policy for research infrastructures"} +{"code":"H2020-Euratom-1.3.","title":"Support the development and sustainability of nuclear competences at Union level","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Support the development and sustainability of nuclear competences at Union level","classification_short":"Euratom | Indirect actions | Support the development and sustainability of nuclear competences at Union level"} +{"code":"H2020-EU.3.1.7.1.","title":"Antimicrobial resistance","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Antimicrobial resistance","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Antimicrobial resistance"} +{"code":"H2020-EU.3.7.4.","title":"Improve cyber security","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Improve cyber security","classification_short":"Societal Challenges | Secure societies | Improve cyber security"} +{"code":"H2020-EU.2.1.1.7.2.","title":"Processes, methods, tools and platforms, reference designs and architectures, for software and/or control-intensive embedded/cyber-physical systems, addressing seamless connectivity and interoperability, functional safety, high availability, and security for professional and consumer type applications, and connected services","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL | Processes, methods, tools and platforms, reference designs and architectures, for software and/or control-intensive embedded/cyber-physical systems, addressing seamless connectivity and interoperability, functional safety, high availability, and security for professional and consumer type applications, and connected services","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL | Processes, methods, tools and platforms, reference designs and architectures, for software and/or control-intensive embedded/cyber-physical systems, addressing seamless connectivity and interoperability, functional safety, high availability, and security for professional and consumer type applications, and connected services"} +{"code":"H2020-EU.3.5.4.","title":"Enabling the transition towards a green economy and society through eco-innovation","shortTitle":"A green economy and society through eco-innovation","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation"} +{"code":"H2020-EU.3.5.3.2.","title":"Promote the sustainable supply and use of raw materials, including mineral resources, from land and sea, covering exploration, extraction, processing, re-use, recycling and recovery","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Promote the sustainable supply and use of raw materials, including mineral resources, from land and sea, covering exploration, extraction, processing, re-use, recycling and recovery","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Promote the sustainable supply and use of raw materials, including mineral resources, from land and sea, covering exploration, extraction, processing, re-use, recycling and recovery"} +{"code":"H2020-EU.3.4.5.10.","title":"Thematic Topics","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | Thematic Topics","classification_short":"Societal Challenges | Transport | CLEANSKY2 | Thematic Topics"} +{"code":"H2020-EU.3.1.5.1.","title":"Improving halth information and better use of health data","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data | Improving halth information and better use of health data","classification_short":"Societal Challenges | Health | Methods and data | Improving halth information and better use of health data"} +{"code":"H2020-EU.3.3.3.1.","title":"Make bio-energy more competitive and sustainable","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources | Make bio-energy more competitive and sustainable","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources | Make bio-energy more competitive and sustainable"} +{"code":"H2020-EU.3.6.2.1.","title":"Strengthen the evidence base and support for the Innovation Union and ERA","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Strengthen the evidence base and support for the Innovation Union and ERA","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Strengthen the evidence base and support for the Innovation Union and ERA"} +{"code":"H2020-EU.3.1.7.12.","title":"Vaccine","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Vaccine","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Vaccine"} +{"code":"H2020-EU.3.5.4.3.","title":"Measure and assess progress towards a green economy","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Measure and assess progress towards a green economy","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Measure and assess progress towards a green economy"} +{"code":"H2020-EU.3.4.8.5.","title":"Innovation Programme 5: Technologies for sustainable and attractive European rail freight","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 5: Technologies for sustainable and attractive European rail freight","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 5: Technologies for sustainable and attractive European rail freight"} +{"code":"H2020-EU.3.5.4.4.","title":"Foster resource efficiency through digital systems","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Foster resource efficiency through digital systems","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Foster resource efficiency through digital systems"} +{"code":"H2020-EU.3.3.8.3.","title":"Demonstrate on a large scale the feasibility of using hydrogen to support integration of renewable energy sources into the energy systems, including through its use as a competitive energy storage medium for electricity produced from renewable energy sources","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives) | Demonstrate on a large scale the feasibility of using hydrogen to support integration of renewable energy sources into the energy systems, including through its use as a competitive energy storage medium for electricity produced from renewable energy sources","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives) | Demonstrate on a large scale the feasibility of using hydrogen to support integration of renewable energy sources into the energy systems, including through its use as a competitive energy storage medium for electricity produced from renewable energy sources"} +{"code":"H2020-Euratom","title":"Euratom","shortTitle":"","language":"en","classification":"Euratom","classification_short":"Euratom"} +{"code":"H2020-EU.3.5.6.2.","title":"Providing for a better understanding on how communities perceive and respond to climate change and seismic and volcanic hazards","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Cultural heritage | Providing for a better understanding on how communities perceive and respond to climate change and seismic and volcanic hazards","classification_short":"Societal Challenges | Climate and environment | Cultural heritage | Providing for a better understanding on how communities perceive and respond to climate change and seismic and volcanic hazards"} +{"code":"H2020-EU.3.2.5.2.","title":"Develop the potential of marine resources through an integrated approach","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research | Develop the potential of marine resources through an integrated approach","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research | Develop the potential of marine resources through an integrated approach"} +{"code":"H2020-EU.2.1.1.5.","title":"Advanced interfaces and robots: Robotics and smart spaces","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Advanced interfaces and robots: Robotics and smart spaces","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Advanced interfaces and robots: Robotics and smart spaces"} +{"code":"H2020-EU.3.3.5.","title":"New knowledge and technologies","shortTitle":"New knowledge and technologies","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | New knowledge and technologies","classification_short":"Societal Challenges | Energy | New knowledge and technologies"} +{"code":"H2020-EU.1.2.2.","title":"FET Proactive","shortTitle":"FET Proactive","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET) | FET Proactive","classification_short":"Excellent Science | Future and Emerging Technologies (FET) | FET Proactive"} +{"code":"H2020-EU.3.6.1.3.","title":"Europe's role as a global actor, notably regarding human rights and global justice","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | Europe's role as a global actor, notably regarding human rights and global justice","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | Europe's role as a global actor, notably regarding human rights and global justice"} +{"code":"H2020-EU.2.1.4.1.","title":"Boosting cutting-edge biotechnologies as a future innovation driver","shortTitle":"Cutting-edge biotechnologies as future innovation driver","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology | Boosting cutting-edge biotechnologies as a future innovation driver","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology | Cutting-edge biotechnologies as future innovation driver"} +{"code":"H2020-EU.3.1.3.","title":"Treating and managing disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Treating and managing disease","classification_short":"Societal Challenges | Health | Treating and managing disease"} +{"code":"H2020-EU.3.3.4.","title":"A single, smart European electricity grid","shortTitle":"A single, smart European electricity grid","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | A single, smart European electricity grid","classification_short":"Societal Challenges | Energy | A single, smart European electricity grid"} +{"code":"H2020-EU.3.2.6.","title":"Bio-based Industries Joint Technology Initiative (BBI-JTI)","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI)","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI)"} +{"code":"H2020-EU.1.3.2.","title":"Nurturing excellence by means of cross-border and cross-sector mobility","shortTitle":"MSCA Mobility","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Nurturing excellence by means of cross-border and cross-sector mobility","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Mobility"} +{"code":"H2020-EU.2.1.3.7.","title":"Optimisation of the use of materials","shortTitle":"Optimisation of the use of materials","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Optimisation of the use of materials","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Optimisation of the use of materials"} +{"code":"H2020-EU.2.1.2.4.","title":"Efficient and sustainable synthesis and manufacturing of nanomaterials, components and systems","shortTitle":"Synthesis and manufacturing of nanomaterials, components and systems","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Efficient and sustainable synthesis and manufacturing of nanomaterials, components and systems","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Synthesis and manufacturing of nanomaterials, components and systems"} +{"code":"H2020-EU.1.4.1.","title":"Developing the European research infrastructures for 2020 and beyond","shortTitle":"Research infrastructures for 2020 and beyond","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond"} +{"code":"H2020-EU.3.1.1.1.","title":"Understanding the determinants of health, improving health promotion and disease prevention","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease | Understanding the determinants of health, improving health promotion and disease prevention","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease | Understanding the determinants of health, improving health promotion and disease prevention"} +{"code":"H2020-EU.5.c.","title":"Integrate society in science and innovation issues, policies and activities in order to integrate citizens' interests and values and to increase the quality, relevance, social acceptability and sustainability of research and innovation outcomes in various fields of activity from social innovation to areas such as biotechnology and nanotechnology","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Integrate society in science and innovation issues, policies and activities in order to integrate citizens' interests and values and to increase the quality, relevance, social acceptability and sustainability of research and innovation outcomes in various fields of activity from social innovation to areas such as biotechnology and nanotechnology","classification_short":"Science with and for Society | Integrate society in science and innovation issues, policies and activities in order to integrate citizens' interests and values and to increase the quality, relevance, social acceptability and sustainability of research and innovation outcomes in various fields of activity from social innovation to areas such as biotechnology and nanotechnology"} +{"code":"H2020-EU.5.","title":"SCIENCE WITH AND FOR SOCIETY","shortTitle":"Science with and for Society","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY","classification_short":"Science with and for Society"} +{"code":"H2020-EU.3.5.3.3.","title":"Find alternatives for critical raw materials","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Find alternatives for critical raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Find alternatives for critical raw materials"} +{"code":"H2020-EU.3.2.3.1.","title":"Developing sustainable and environmentally-friendly fisheries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources | Developing sustainable and environmentally-friendly fisheries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources | Developing sustainable and environmentally-friendly fisheries"} +{"code":"H2020-EU.2.1.2.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies – Nanotechnologies","shortTitle":"Nanotechnologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies"} +{"code":"H2020-EU.3.4.3.2.","title":"On board, smart control systems","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | On board, smart control systems","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | On board, smart control systems"} +{"code":"H2020-EU.3.2.4.1.","title":"Fostering the bio-economy for bio-based industries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy | Fostering the bio-economy for bio-based industries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy | Fostering the bio-economy for bio-based industries"} +{"code":"H2020-EU.3.1.6.2.","title":"Optimising the efficiency and effectiveness of healthcare provision and reducing inequalities by evidence based decision making and dissemination of best practice, and innovative technologies and approaches","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Health care provision and integrated care | Optimising the efficiency and effectiveness of healthcare provision and reducing inequalities by evidence based decision making and dissemination of best practice, and innovative technologies and approaches","classification_short":"Societal Challenges | Health | Health care provision and integrated care | Optimising the efficiency and effectiveness of healthcare provision and reducing inequalities by evidence based decision making and dissemination of best practice, and innovative technologies and approaches"} +{"code":"H2020-EU.2.1.5.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies - Advanced manufacturing and processing","shortTitle":"Advanced manufacturing and processing","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing"} +{"code":"H2020-EU.3.5.2.2.","title":"Developing integrated approaches to address water-related challenges and the transition to sustainable management and use of water resources and services","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems | Developing integrated approaches to address water-related challenges and the transition to sustainable management and use of water resources and services","classification_short":"Societal Challenges | Climate and environment | Protection of the environment | Developing integrated approaches to address water-related challenges and the transition to sustainable management and use of water resources and services"} +{"code":"H2020-EU.3.1.7.3.","title":"Cardiovascular diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Cardiovascular diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Cardiovascular diseases"} +{"code":"H2020-EU.3.3.8.2.","title":"Increase the energy efficiency of production of hydrogen mainly from water electrolysis and renewable sources while reducing operating and capital costs, so that the combined system of the hydrogen production and the conversion using the fuel cell system can compete with the alternatives for electricity production available on the market","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives) | Increase the energy efficiency of production of hydrogen mainly from water electrolysis and renewable sources while reducing operating and capital costs, so that the combined system of the hydrogen production and the conversion using the fuel cell system can compete with the alternatives for electricity production available on the market","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives) | Increase the energy efficiency of production of hydrogen mainly from water electrolysis and renewable sources while reducing operating and capital costs, so that the combined system of the hydrogen production and the conversion using the fuel cell system can compete with the alternatives for electricity production available on the market"} +{"code":"H2020-EU.2.1.6.3.","title":"Enabling exploitation of space data","shortTitle":"Enabling exploitation of space data","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling exploitation of space data","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Enabling exploitation of space data"} +{"code":"H2020-EU.2.1.2.5.","title":"Developing and standardisation of capacity-enhancing techniques, measuring methods and equipment","shortTitle":"Capacity-enhancing techniques, measuring methods and equipment","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Developing and standardisation of capacity-enhancing techniques, measuring methods and equipment","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Capacity-enhancing techniques, measuring methods and equipment"} +{"code":"H2020-EU.3.6.2.","title":"Innovative societies","shortTitle":"Innovative societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies"} +{"code":"H2020-EU.3.1.2.1.","title":"Developing effective prevention and screening programmes and improving the assessment of disease susceptibility","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease | Developing effective prevention and screening programmes and improving the assessment of disease susceptibility","classification_short":"Societal Challenges | Health | Preventing disease | Developing effective prevention and screening programmes and improving the assessment of disease susceptibility"} +{"code":"H2020-EU.3.6.1.4.","title":"The promotion of sustainable and inclusive environments through innovative spatial and urban planning and design","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | The promotion of sustainable and inclusive environments through innovative spatial and urban planning and design","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | The promotion of sustainable and inclusive environments through innovative spatial and urban planning and design"} +{"code":"H2020-EU.3.3.2.4.","title":"Develop geothermal, hydro, marine and other renewable energy options","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop geothermal, hydro, marine and other renewable energy options","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop geothermal, hydro, marine and other renewable energy options"} +{"code":"H2020-EU.5.b.","title":"Promote gender equality in particular by supporting structural change in the organisation of research institutions and in the content and design of research activities","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Promote gender equality in particular by supporting structural change in the organisation of research institutions and in the content and design of research activities","classification_short":"Science with and for Society | Promote gender equality in particular by supporting structural change in the organisation of research institutions and in the content and design of research activities"} +{"code":"H2020-EU.1.3.3.","title":"Stimulating innovation by means of cross-fertilisation of knowledge","shortTitle":"MSCA Knowledge","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Stimulating innovation by means of cross-fertilisation of knowledge","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Knowledge"} +{"code":"H2020-EU.3.1.4.2.","title":"Individual awareness and empowerment for self-management of health","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Active ageing and self-management of health | Individual awareness and empowerment for self-management of health","classification_short":"Societal Challenges | Health | Active ageing and self-management of health | Individual awareness and empowerment for self-management of health"} +{"code":"H2020-EU.3.1.7.8.","title":"Immune-mediated diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Immune-mediated diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Immune-mediated diseases"} +{"code":"H2020-EU.3.4.","title":"SOCIETAL CHALLENGES - Smart, Green And Integrated Transport","shortTitle":"Transport","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport","classification_short":"Societal Challenges | Transport"} +{"code":"H2020-EU.3.2.6.1.","title":"Sustainable and competitive bio-based industries and supporting the development of a European bio-economy","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable and competitive bio-based industries and supporting the development of a European bio-economy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable and competitive bio-based industries and supporting the development of a European bio-economy"} +{"code":"H2020-EU.2.1.2.1.","title":"Developing next generation nanomaterials, nanodevices and nanosystems ","shortTitle":"Next generation nanomaterials, nanodevices and nanosystems","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Developing next generation nanomaterials, nanodevices and nanosystems ","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Next generation nanomaterials, nanodevices and nanosystems"} +{"code":"H2020-Euratom-1.5.","title":"Move toward demonstration of feasibility of fusion as a power source by exploiting existing and future fusion facilities","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Move toward demonstration of feasibility of fusion as a power source by exploiting existing and future fusion facilities","classification_short":"Euratom | Indirect actions | Move toward demonstration of feasibility of fusion as a power source by exploiting existing and future fusion facilities"} +{"code":"H2020-EU.3.5.","title":"SOCIETAL CHALLENGES - Climate action, Environment, Resource Efficiency and Raw Materials","shortTitle":"Climate and environment","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials","classification_short":"Societal Challenges | Climate and environment"} +{"code":"H2020-EU.2.1.1.6.","title":"Micro- and nanoelectronics and photonics: Key enabling technologies related to micro- and nanoelectronics and to photonics, covering also quantum technologies","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Micro- and nanoelectronics and photonics: Key enabling technologies related to micro- and nanoelectronics and to photonics, covering also quantum technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Micro- and nanoelectronics and photonics: Key enabling technologies related to micro- and nanoelectronics and to photonics, covering also quantum technologies"} +{"code":"H2020-EU.3.4.2.4.","title":"Reducing accident rates, fatalities and casualties and improving security","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | Reducing accident rates, fatalities and casualties and improving security","classification_short":"Societal Challenges | Transport | Mobility, safety and security | Reducing accident rates, fatalities and casualties and improving security"} +{"code":"H2020-EU.3.6.2.2.","title":"Explore new forms of innovation, with special emphasis on social innovation and creativity and understanding how all forms of innovation are developed, succeed or fail","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Explore new forms of innovation, with special emphasis on social innovation and creativity and understanding how all forms of innovation are developed, succeed or fail","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Explore new forms of innovation, with special emphasis on social innovation and creativity and understanding how all forms of innovation are developed, succeed or fail"} +{"code":"H2020-EU.3.5.1.1.","title":"Improve the understanding of climate change and the provision of reliable climate projections","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change | Improve the understanding of climate change and the provision of reliable climate projections","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change | Improve the understanding of climate change and the provision of reliable climate projections"} +{"code":"H2020-EU.3.4.3.4.","title":"Exploring entirely new transport concepts","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | Exploring entirely new transport concepts","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | Exploring entirely new transport concepts"} +{"code":"H2020-EU.3.5.2.1.","title":"Further our understanding of biodiversity and the functioning of ecosystems, their interactions with social systems and their role in sustaining the economy and human well-being","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems | Further our understanding of biodiversity and the functioning of ecosystems, their interactions with social systems and their role in sustaining the economy and human well-being","classification_short":"Societal Challenges | Climate and environment | Protection of the environment | Further our understanding of biodiversity and the functioning of ecosystems, their interactions with social systems and their role in sustaining the economy and human well-being"} +{"code":"H2020-EU.3.2.2.3.","title":"A sustainable and competitive agri-food industry","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | A sustainable and competitive agri-food industry","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | A sustainable and competitive agri-food industry"} +{"code":"H2020-EU.1.4.1.1.","title":"Developing new world-class research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond | Developing new world-class research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond | Developing new world-class research infrastructures"} +{"code":"H2020-EU.3.1.2.3.","title":"Developing better preventive and therapeutic vaccines","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease | Developing better preventive and therapeutic vaccines","classification_short":"Societal Challenges | Health | Preventing disease | Developing better preventive and therapeutic vaccines"} +{"code":"H2020-EU.1.4.3.2.","title":"Facilitate strategic international cooperation","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Reinforcing European research infrastructure policy and international cooperation | Facilitate strategic international cooperation","classification_short":"Excellent Science | Research Infrastructures | Research infrastructure policy and international cooperation | Facilitate strategic international cooperation"} +{"code":"H2020-EU.3.5.2.","title":"Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems","shortTitle":"Protection of the environment","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems","classification_short":"Societal Challenges | Climate and environment | Protection of the environment"} +{"code":"H2020-Euratom-1.9.","title":"European Fusion Development Agreement","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | European Fusion Development Agreement","classification_short":"Euratom | Indirect actions | European Fusion Development Agreement"} +{"code":"H2020-EU.3.2.1.1.","title":"Increasing production efficiency and coping with climate change, while ensuring sustainability and resilience","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Increasing production efficiency and coping with climate change, while ensuring sustainability and resilience","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Increasing production efficiency and coping with climate change, while ensuring sustainability and resilience"} +{"code":"H2020-EU.3.2.2.2.","title":"Healthy and safe foods and diets for all","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Healthy and safe foods and diets for all","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Healthy and safe foods and diets for all"} +{"code":"H2020-EU.2.1.4.2.","title":"Bio-technology based industrial products and processes","shortTitle":"Bio-technology based industrial products and processes","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology | Bio-technology based industrial products and processes","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology | Bio-technology based industrial products and processes"} +{"code":"H2020-EU.3.4.5.1.","title":"IADP Large Passenger Aircraft","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | IADP Large Passenger Aircraft","classification_short":"Societal Challenges | Transport | CLEANSKY2 | IADP Large Passenger Aircraft"} +{"code":"H2020-EU.3.1.1.3.","title":"Improving surveillance and preparedness","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease | Improving surveillance and preparedness","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease | Improving surveillance and preparedness"} +{"code":"H2020-EU.2.1.6.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies – Space","shortTitle":"Space","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space"} +{"code":"H2020-EU.3.1.5.2.","title":"Improving scientific tools and methods to support policy making and regulatory needs","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data | Improving scientific tools and methods to support policy making and regulatory needs","classification_short":"Societal Challenges | Health | Methods and data | Improving scientific tools and methods to support policy making and regulatory needs"} +{"code":"H2020-EU.3.","title":"Societal challenges","shortTitle":"Societal Challenges","language":"en","classification":"Societal challenges","classification_short":"Societal Challenges"} +{"code":"H2020-EU.1.3.","title":"EXCELLENT SCIENCE - Marie Skłodowska-Curie Actions","shortTitle":"Marie-Sklodowska-Curie Actions","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions"} +{"code":"H2020-EU.4.f.","title":"Strengthening the administrative and operational capacity of transnational networks of National Contact Points","shortTitle":"","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Strengthening the administrative and operational capacity of transnational networks of National Contact Points","classification_short":"Spreading excellence and widening participation | Strengthening the administrative and operational capacity of transnational networks of National Contact Points"} +{"code":"H2020-EU.1.2.","title":"EXCELLENT SCIENCE - Future and Emerging Technologies (FET)","shortTitle":"Future and Emerging Technologies (FET)","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET)","classification_short":"Excellent Science | Future and Emerging Technologies (FET)"} +{"code":"H2020-EU.3.3.1.1.","title":"Bring to mass market technologies and services for a smart and efficient energy use","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use | Bring to mass market technologies and services for a smart and efficient energy use","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint | Bring to mass market technologies and services for a smart and efficient energy use"} +{"code":"H2020-EU.3.3.2.2.","title":"Develop efficient, reliable and cost-competitive solar energy systems","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop efficient, reliable and cost-competitive solar energy systems","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop efficient, reliable and cost-competitive solar energy systems"} +{"code":"H2020-EU.4.c.","title":"Establishing ‚ERA Chairs’","shortTitle":"ERA chairs","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Establishing ‚ERA Chairs’","classification_short":"Spreading excellence and widening participation | ERA chairs"} +{"code":"H2020-EU.3.4.5","title":"CLEANSKY2","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2","classification_short":"Societal Challenges | Transport | CLEANSKY2"} +{"code":"H2020-EU.3.4.5.2.","title":"IADP Regional Aircraft","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | IADP Regional Aircraft","classification_short":"Societal Challenges | Transport | CLEANSKY2 | IADP Regional Aircraft"} +{"code":"H2020-EU.3.5.1.","title":"Fighting and adapting to climate change","shortTitle":"Fighting and adapting to climate change","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change"} +{"code":"H2020-EU.3.3.1.","title":"Reducing energy consumption and carbon foorpint by smart and sustainable use","shortTitle":"Reducing energy consumption and carbon footprint","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint"} +{"code":"H2020-EU.3.4.1.","title":"Resource efficient transport that respects the environment","shortTitle":"Resource efficient transport that respects the environment","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment"} +{"code":"H2020-EU.3.2.6.2.","title":"Fostering the bio-economy for bio-based industrie","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Fostering the bio-economy for bio-based industrie","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Fostering the bio-economy for bio-based industrie"} +{"code":"H2020-EU.3.4.7.1","title":"Exploratory Research","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | SESAR JU | Exploratory Research","classification_short":"Societal Challenges | Transport | SESAR JU | Exploratory Research"} +{"code":"H2020-EU.1.2.1.","title":"FET Open","shortTitle":"FET Open","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET) | FET Open","classification_short":"Excellent Science | Future and Emerging Technologies (FET) | FET Open"} +{"code":"H2020-EU.3.4.3.1.","title":"Developing the next generation of transport means as the way to secure market share in the future","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | Developing the next generation of transport means as the way to secure market share in the future","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | Developing the next generation of transport means as the way to secure market share in the future"} +{"code":"H2020-EU.3.2.4.","title":"Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy","shortTitle":"Bio-based industries and supporting bio-economy","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy"} +{"code":"H2020-EC","title":"Horizon 2020 Framework Programme","shortTitle":"EC Treaty","language":"en","classification":"Horizon 2020 Framework Programme","classification_short":"EC Treaty"} +{"code":"H2020-EU.3.6.2.4.","title":"Promote coherent and effective cooperation with third countries","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Promote coherent and effective cooperation with third countries","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Promote coherent and effective cooperation with third countries"} +{"code":"H2020-EU.3.1.7.5.","title":"Neurodegenerative diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Neurodegenerative diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Neurodegenerative diseases"} +{"code":"H2020-EU.2.1.6.4.","title":"Enabling European research in support of international space partnerships","shortTitle":"Research in support of international space partnerships","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European research in support of international space partnerships","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Research in support of international space partnerships"} +{"code":"H2020-EU.2.1.5.1.","title":"Technologies for Factories of the Future","shortTitle":"Technologies for Factories of the Future","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | Technologies for Factories of the Future","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | Technologies for Factories of the Future"} +{"code":"H2020-EU.2.3.2.","title":"Specific support","shortTitle":"","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support"} +{"code":"H2020-EU.1.4.2.","title":"Fostering the innovation potential of research infrastructures and their human resources","shortTitle":"Research infrastructures and their human resources","language":"en","classification":"Excellent science | Research Infrastructures | Fostering the innovation potential of research infrastructures and their human resources","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures and their human resources"} +{"code":"H2020-EU.3.3.1.2.","title":"Unlock the potential of efficient and renewable heating-cooling systems","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use | Unlock the potential of efficient and renewable heating-cooling systems","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint | Unlock the potential of efficient and renewable heating-cooling systems"} +{"code":"H2020-EU.3.2.3.2.","title":"Developing competitive and environmentally-friendly European aquaculture","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources | Developing competitive and environmentally-friendly European aquaculture","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources | Developing competitive and environmentally-friendly European aquaculture"} +{"code":"H2020-EU.3.2.1.3.","title":"Empowerment of rural areas, support to policies and rural innovation","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Empowerment of rural areas, support to policies and rural innovation","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Empowerment of rural areas, support to policies and rural innovation"} +{"code":"H2020-EU.3.2.5.3.","title":"Cross-cutting concepts and technologies enabling maritime growth","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research | Cross-cutting concepts and technologies enabling maritime growth","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research | Cross-cutting concepts and technologies enabling maritime growth"} +{"code":"H2020-EU.2.1.3.1.","title":"Cross-cutting and enabling materials technologies","shortTitle":"Cross-cutting and enabling materials technologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Cross-cutting and enabling materials technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Cross-cutting and enabling materials technologies"} +{"code":"H2020-EU.3.1.1.2.","title":"Understanding disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease | Understanding disease","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease | Understanding disease"} +{"code":"H2020-Euratom-1.6.","title":"Lay the foundations for future fusion power plants by developing materials, technologies and conceptual design","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Lay the foundations for future fusion power plants by developing materials, technologies and conceptual design","classification_short":"Euratom | Indirect actions | Lay the foundations for future fusion power plants by developing materials, technologies and conceptual design"} +{"code":"H2020-EU.3.5.7.1.","title":"Reduce the use of the EU defined \"Critical raw materials\", for instance through low platinum or platinum free resources and through recycling or reducing or avoiding the use of rare earth elements","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | FCH2 (raw materials objective) | Reduce the use of the EU defined \"Critical raw materials\", for instance through low platinum or platinum free resources and through recycling or reducing or avoiding the use of rare earth elements","classification_short":"Societal Challenges | Climate and environment | FCH2 (raw materials objective) | Reduce the use of the EU defined \"Critical raw materials\", for instance through low platinum or platinum free resources and through recycling or reducing or avoiding the use of rare earth elements"} +{"code":"H2020-EU.2.2.","title":"INDUSTRIAL LEADERSHIP - Access to risk finance","shortTitle":"Access to risk finance","language":"en","classification":"Industrial leadership | Access to risk finance","classification_short":"Industrial Leadership | Access to risk finance"} +{"code":"H2020-EU.3.4.6.","title":"FCH2 (transport objectives)","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | FCH2 (transport objectives)","classification_short":"Societal Challenges | Transport | FCH2 (transport objectives)"} +{"code":"H2020-EU.4.d.","title":"A Policy Support Facility","shortTitle":"Policy Support Facility (PSF)","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | A Policy Support Facility","classification_short":"Spreading excellence and widening participation | Policy Support Facility (PSF)"} +{"code":"H2020-EU.2.1.1.7.","title":"ECSEL","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL"} +{"code":"H2020-EU.3.1.5.","title":"Methods and data","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data","classification_short":"Societal Challenges | Health | Methods and data"} +{"code":"H2020-EU.3.7.7.","title":"Enhance stadardisation and interoperability of systems, including for emergency purposes","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Enhance stadardisation and interoperability of systems, including for emergency purposes","classification_short":"Societal Challenges | Secure societies | Enhance stadardisation and interoperability of systems, including for emergency purposes"} +{"code":"H2020-Euratom-1.7.","title":"Promote innovation and industry competitiveness","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Promote innovation and industry competitiveness","classification_short":"Euratom | Indirect actions | Promote innovation and industry competitiveness"} +{"code":"H2020-EU.2.1.5.3.","title":"Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries","shortTitle":"Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries"} +{"code":"H2020-EU.2.1.4.3.","title":"Innovative and competitive platform technologies","shortTitle":"Innovative and competitive platform technologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology | Innovative and competitive platform technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology | Innovative and competitive platform technologies"} +{"code":"H2020-EU.1.2.3.","title":"FET Flagships","shortTitle":"FET Flagships","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET) | FET Flagships","classification_short":"Excellent Science | Future and Emerging Technologies (FET) | FET Flagships"} +{"code":"H2020-EU.3.6.3.","title":"Reflective societies - cultural heritage and European identity","shortTitle":"Reflective societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies"} +{"code":"H2020-EU.3.6.3.3.","title":"Research on Europe's role in the world, on the mutual influence and ties between the world regions, and a view from outside on European cultures","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity | Research on Europe's role in the world, on the mutual influence and ties between the world regions, and a view from outside on European cultures","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies | Research on Europe's role in the world, on the mutual influence and ties between the world regions, and a view from outside on European cultures"} +{"code":"H2020-EU.3.2.4.2.","title":"Developing integrated biorefineries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy | Developing integrated biorefineries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy | Developing integrated biorefineries"} +{"code":"H2020-EU.2.1.6.1.1.","title":"Safeguard and further develop a competitive, sustainable and entrepreneurial space industry and research community and strengthen European non-dependence in space systems","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European competitiveness, non-dependence and innovation of the European space sector | Safeguard and further develop a competitive, sustainable and entrepreneurial space industry and research community and strengthen European non-dependence in space systems","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Competitiveness, non-dependence and innovation | Safeguard and further develop a competitive, sustainable and entrepreneurial space industry and research community and strengthen European non-dependence in space systems"} +{"code":"H2020-EU.3.1.3.2.","title":"Transferring knowledge to clinical practice and scalable innovation actions","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Treating and managing disease | Transferring knowledge to clinical practice and scalable innovation actions","classification_short":"Societal Challenges | Health | Treating and managing disease | Transferring knowledge to clinical practice and scalable innovation actions"} +{"code":"H2020-EU.2.","title":"Industrial leadership","shortTitle":"Industrial Leadership","language":"en","classification":"Industrial leadership","classification_short":"Industrial Leadership"} +{"code":"H2020-EU.3.4.1.3.","title":"Improving transport and mobility in urban areas","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment | Improving transport and mobility in urban areas","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment | Improving transport and mobility in urban areas"} +{"code":"H2020-EU.4.e.","title":"Supporting access to international networks for excellent researchers and innovators who lack sufficient involvement in European and international networks","shortTitle":"","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Supporting access to international networks for excellent researchers and innovators who lack sufficient involvement in European and international networks","classification_short":"Spreading excellence and widening participation | Supporting access to international networks for excellent researchers and innovators who lack sufficient involvement in European and international networks"} +{"code":"H2020-EU.3.2.1.","title":"Sustainable agriculture and forestry","shortTitle":"Sustainable agriculture and forestry","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry"} +{"code":"H2020-EU.3.1.7.7.","title":"Respiratory diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Respiratory diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Respiratory diseases"} +{"code":"H2020-EU.3.4.8.6.","title":"Cross-cutting themes and activities (CCA)","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Cross-cutting themes and activities (CCA)","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Cross-cutting themes and activities (CCA)"} +{"code":"H2020-EU.3.4.8.4.","title":"Innovation Programme 4: IT Solutions for attractive railway services","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 4: IT Solutions for attractive railway services","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 4: IT Solutions for attractive railway services"} +{"code":"H2020-EU.3.2.2.","title":"Sustainable and competitive agri-food sector for a safe and healthy diet","shortTitle":"Sustainable and competitive agri-food sector for a safe and healthy diet","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet"} +{"code":"H2020-EU.3.4.3.","title":"Global leadership for the European transport industry","shortTitle":"Global leadership for the European transport industry","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry"} +{"code":"H2020-EU.1.4.2.1.","title":"Exploiting the innovation potential of research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Fostering the innovation potential of research infrastructures and their human resources | Exploiting the innovation potential of research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures and their human resources | Exploiting the innovation potential of research infrastructures"} +{"code":"H2020-EU.3.3.2.3.","title":"Develop competitive and environmentally safe technologies for CO2 capture, transport, storage and re-use","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop competitive and environmentally safe technologies for CO2 capture, transport, storage and re-use","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop competitive and environmentally safe technologies for CO2 capture, transport, storage and re-use"} +{"code":"H2020-EU.3.6.3.1.","title":"Study European heritage, memory, identity, integration and cultural interaction and translation, including its representations in cultural and scientific collections, archives and museums, to better inform and understand the present by richer interpretations of the past","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity | Study European heritage, memory, identity, integration and cultural interaction and translation, including its representations in cultural and scientific collections, archives and museums, to better inform and understand the present by richer interpretations of the past","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies | Study European heritage, memory, identity, integration and cultural interaction and translation, including its representations in cultural and scientific collections, archives and museums, to better inform and understand the present by richer interpretations of the past"} +{"code":"H2020-EU.2.1.2.2.","title":"Ensuring the safe and sustainable development and application of nanotechnologies","shortTitle":"Safe and sustainable nanotechnologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Ensuring the safe and sustainable development and application of nanotechnologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Safe and sustainable nanotechnologies"} +{"code":"H2020-EU.3.1.6.","title":"Health care provision and integrated care","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Health care provision and integrated care","classification_short":"Societal Challenges | Health | Health care provision and integrated care"} +{"code":"H2020-EU.3.4.5.9.","title":"Technology Evaluator","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | Technology Evaluator","classification_short":"Societal Challenges | Transport | CLEANSKY2 | Technology Evaluator"} +{"code":"H2020-EU.3.6.","title":"SOCIETAL CHALLENGES - Europe In A Changing World - Inclusive, Innovative And Reflective Societies","shortTitle":"Inclusive, innovative and reflective societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies"} +{"code":"H2020-EU.3.4.8.","title":"Shift2Rail JU","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU","classification_short":"Societal Challenges | Transport | Shift2Rail JU"} +{"code":"H2020-EU.3.2.6.3.","title":"Sustainable biorefineries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable biorefineries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable biorefineries"} +{"code":"H2020-EU.4.a.","title":"Teaming of excellent research institutions and low performing RDI regions","shortTitle":"Teaming of research institutions and low performing regions","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Teaming of excellent research institutions and low performing RDI regions","classification_short":"Spreading excellence and widening participation | Teaming of research institutions and low performing regions"} +{"code":"H2020-EU.3.1.7.4.","title":"Diabetes","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Diabetes","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Diabetes"} +{"code":"H2020-EU.3.7.2.","title":"Protect and improve the resilience of critical infrastructures, supply chains and tranport modes","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Protect and improve the resilience of critical infrastructures, supply chains and tranport modes","classification_short":"Societal Challenges | Secure societies | Protect and improve the resilience of critical infrastructures, supply chains and tranport modes"} +{"code":"H2020-EU.3.1.2.","title":"Preventing disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease","classification_short":"Societal Challenges | Health | Preventing disease"} +{"code":"H2020-EU.3.5.3.4.","title":"Improve societal awareness and skills on raw materials","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Improve societal awareness and skills on raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Improve societal awareness and skills on raw materials"} +{"code":"H2020-EU.3.3.7.","title":"Market uptake of energy innovation - building on Intelligent Energy Europe","shortTitle":"Market uptake of energy innovation","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Market uptake of energy innovation - building on Intelligent Energy Europe","classification_short":"Societal Challenges | Energy | Market uptake of energy innovation"} +{"code":"H2020-EU.2.3.","title":"INDUSTRIAL LEADERSHIP - Innovation In SMEs","shortTitle":"Innovation in SMEs","language":"en","classification":"Industrial leadership | Innovation In SMEs","classification_short":"Industrial Leadership | Innovation in SMEs"} +{"code":"H2020-EU.2.1.1.3.","title":"Future Internet: Software, hardware, Infrastructures, technologies and services","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Future Internet: Software, hardware, Infrastructures, technologies and services","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Future Internet: Software, hardware, Infrastructures, technologies and services"} +{"code":"H2020-EU.3.1.5.3.","title":"Using in-silico medicine for improving disease management and prediction","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data | Using in-silico medicine for improving disease management and prediction","classification_short":"Societal Challenges | Health | Methods and data | Using in-silico medicine for improving disease management and prediction"} +{"code":"H2020-EU.3.6.1.1.","title":"The mechanisms to promote smart, sustainable and inclusive growth","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | The mechanisms to promote smart, sustainable and inclusive growth","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | The mechanisms to promote smart, sustainable and inclusive growth"} +{"code":"H2020-EU.1.3.1.","title":"Fostering new skills by means of excellent initial training of researchers","shortTitle":"MCSA Initial training","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Fostering new skills by means of excellent initial training of researchers","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MCSA Initial training"} +{"code":"H2020-EU.3.6.2.3.","title":"Make use of the innovative, creative and productive potential of all generations","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Make use of the innovative, creative and productive potential of all generations","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Make use of the innovative, creative and productive potential of all generations"} +{"code":"H2020-EU.3.5.1.3.","title":"Support mitigation policies, including studies that focus on impact from other sectoral policies","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change | Support mitigation policies, including studies that focus on impact from other sectoral policies","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change | Support mitigation policies, including studies that focus on impact from other sectoral policies"} +{"code":"H2020-EU.3.3.1.3.","title":"Foster European Smart cities and Communities","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use | Foster European Smart cities and Communities","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint | Foster European Smart cities and Communities"} +{"code":"H2020-EU.3.1.1.","title":"Understanding health, wellbeing and disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease"} +{"code":"H2020-Euratom-1.","title":"Indirect actions","shortTitle":"","language":"en","classification":"Euratom | Indirect actions","classification_short":"Euratom | Indirect actions"} +{"code":"H2020-EU.3.5.7.","title":"FCH2 (raw materials objective)","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | FCH2 (raw materials objective)","classification_short":"Societal Challenges | Climate and environment | FCH2 (raw materials objective)"} +{"code":"H2020-EU.3.7.3.","title":"Strengthen security through border management","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Strengthen security through border management","classification_short":"Societal Challenges | Secure societies | Strengthen security through border management"} +{"code":"H2020-EU.2.1.1.2.","title":"Next generation computing: Advanced and secure computing systems and technologies, including cloud computing","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Next generation computing: Advanced and secure computing systems and technologies, including cloud computing","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Next generation computing: Advanced and secure computing systems and technologies, including cloud computing"} +{"code":"H2020-EU.3.5.5.","title":"Developing comprehensive and sustained global environmental observation and information systems","shortTitle":"Environmental observation and information systems","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Developing comprehensive and sustained global environmental observation and information systems","classification_short":"Societal Challenges | Climate and environment | Environmental observation and information systems"} +{"code":"H2020-EU.3.1.7.10.","title":"Cancer","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Cancer","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Cancer"} +{"code":"H2020-EU.3.4.8.2.","title":"Innovation Programme 2: Advanced traffic management and control systems","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 2: Advanced traffic management and control systems","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 2: Advanced traffic management and control systems"} +{"code":"H2020-EU.5.e.","title":"Develop the accessibility and the use of the results of publicly-funded research","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Develop the accessibility and the use of the results of publicly-funded research","classification_short":"Science with and for Society | Develop the accessibility and the use of the results of publicly-funded research"} +{"code":"H2020-EU.3.4.4.","title":"Socio-economic and behavioural research and forward looking activities for policy making","shortTitle":"Socio-economic and behavioural research","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Socio-economic and behavioural research and forward looking activities for policy making","classification_short":"Societal Challenges | Transport | Socio-economic and behavioural research"} +{"code":"H2020-EU.3.3.2.","title":"Low-cost, low-carbon energy supply","shortTitle":"Low-cost, low-carbon energy supply","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply"} +{"code":"H2020-EU.3.4.2.2.","title":"Substantial improvements in the mobility of people and freight","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | Substantial improvements in the mobility of people and freight","classification_short":"Societal Challenges | Transport | Mobility, safety and security | Substantial improvements in the mobility of people and freight"} +{"code":"H2020-EU.3.5.6.","title":"Cultural heritage","shortTitle":"Cultural heritage","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Cultural heritage","classification_short":"Societal Challenges | Climate and environment | Cultural heritage"} +{"code":"H2020-EU.3.5.3.","title":"Ensuring the sustainable supply of non-energy and non-agricultural raw materials","shortTitle":"Supply of non-energy and non-agricultural raw materials","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials"} +{"code":"H2020-EU.2.1.5.2.","title":"Technologies enabling energy-efficient systems and energy-efficient buildings with a low environmental impact","shortTitle":"Technologies enabling energy-efficient systems and buildings","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | Technologies enabling energy-efficient systems and energy-efficient buildings with a low environmental impact","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | Technologies enabling energy-efficient systems and buildings"} +{"code":"H2020-EU.1.4.1.2.","title":"Integrating and opening existing national and regional research infrastructures of European interest","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond | Integrating and opening existing national and regional research infrastructures of European interest","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond | Integrating and opening existing national and regional research infrastructures of European interest"} +{"code":"H2020-EU.3.7.8.","title":"Support the Union's external security policies including through conflict prevention and peace-building","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Support the Union's external security policies including through conflict prevention and peace-building","classification_short":"Societal Challenges | Secure societies | Support the Union's external security policies including through conflict prevention and peace-building"} +{"code":"H2020-EU.2.1.1.1.","title":"A new generation of components and systems: Engineering of advanced embedded and energy and resource efficient components and systems","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | A new generation of components and systems: Engineering of advanced embedded and energy and resource efficient components and systems","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | A new generation of components and systems: Engineering of advanced embedded and energy and resource efficient components and systems"} +{"code":"H2020-EU.1.1.","title":"EXCELLENT SCIENCE - European Research Council (ERC)","shortTitle":"European Research Council (ERC)","language":"en","classification":"Excellent science | European Research Council (ERC)","classification_short":"Excellent Science | European Research Council (ERC)"} +{"code":"H2020-EU.3.4.5.6.","title":"ITD Systems","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ITD Systems","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ITD Systems"} +{"code":"H2020-EU.6.","title":"NON-NUCLEAR DIRECT ACTIONS OF THE JOINT RESEARCH CENTRE (JRC)","shortTitle":"Joint Research Centre (JRC) non-nuclear direct actions","language":"en","classification":"NON-NUCLEAR DIRECT ACTIONS OF THE JOINT RESEARCH CENTRE (JRC)","classification_short":"Joint Research Centre (JRC) non-nuclear direct actions"} +{"code":"H2020-EU.3.2.5.1.","title":"Climate change impact on marine ecosystems and maritime economy","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research | Climate change impact on marine ecosystems and maritime economy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research | Climate change impact on marine ecosystems and maritime economy"} +{"code":"H2020-Euratom-1.2.","title":"Contribute to the development of solutions for the management of ultimate nuclear waste","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Contribute to the development of solutions for the management of ultimate nuclear waste","classification_short":"Euratom | Indirect actions | Contribute to the development of solutions for the management of ultimate nuclear waste"} +{"code":"H2020-EU.3.1.7.11.","title":"Rare/Orphan Diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Rare/Orphan Diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Rare/Orphan Diseases"} +{"code":"H2020-EU.3.1.4.1.","title":"Active ageing, independent and assisted living","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Active ageing and self-management of health | Active ageing, independent and assisted living","classification_short":"Societal Challenges | Health | Active ageing and self-management of health | Active ageing, independent and assisted living"} +{"code":"H2020-Euratom-1.4.","title":"Foster radiation protection","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Foster radiation protection","classification_short":"Euratom | Indirect actions | Foster radiation protection"} +{"code":"H2020-EU.2.2.2.","title":"The Equity facility providing equity finance for R&I: 'Union equity instruments for research and innovation'","shortTitle":"Equity facility","language":"en","classification":"Industrial leadership | Access to risk finance | The Equity facility providing equity finance for R&I: 'Union equity instruments for research and innovation'","classification_short":"Industrial Leadership | Access to risk finance | Equity facility"} +{"code":"H2020-EU.3.3.8.","title":"FCH2 (energy objectives)","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives)","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives)"} +{"code":"H2020-EU.3.2.3.","title":"Unlocking the potential of aquatic living resources","shortTitle":"Potential of aquatic living resources","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources"} +{"code":"H2020-EU.3.5.2.3.","title":"Provide knowledge and tools for effective decision making and public engagement","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems | Provide knowledge and tools for effective decision making and public engagement","classification_short":"Societal Challenges | Climate and environment | Protection of the environment | Provide knowledge and tools for effective decision making and public engagement"} +{"code":"H2020-EU.3.3.6.","title":"Robust decision making and public engagement","shortTitle":"Robust decision making and public engagement","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Robust decision making and public engagement","classification_short":"Societal Challenges | Energy | Robust decision making and public engagement"} +{"code":"H2020-EU.3.1.7.2.","title":"Osteoarthritis","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Osteoarthritis","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Osteoarthritis"} +{"code":"H2020-EU.2.1.1.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies - Information and Communication Technologies (ICT)","shortTitle":"Information and Communication Technologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT)","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies"} +{"code":"H2020-EU.2.1.6.2.","title":"Enabling advances in space technology","shortTitle":"Enabling advances in space technology","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling advances in space technology","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Enabling advances in space technology"} +{"code":"H2020-EU.2.1.1.4.","title":"Content technologies and information management: ICT for digital content, cultural and creative industries","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Content technologies and information management: ICT for digital content, cultural and creative industries","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Content technologies and information management: ICT for digital content, cultural and creative industries"} +{"code":"H2020-EU.2.1.5.4.","title":"New sustainable business models","shortTitle":"New sustainable business models","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | New sustainable business models","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | New sustainable business models"} +{"code":"H2020-EU.2.1.4.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies – Biotechnology","shortTitle":"Biotechnology","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology"} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz deleted file mode 100644 index 620e1abfb..000000000 Binary files a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz and /dev/null differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json index 058ce8877..ca2c0f165 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json @@ -1,17 +1,17 @@ -{"rcn":"229267","id":"894593","acronym":"ICARUS","status":"SIGNED","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019","frameworkProgramme":"H2020","title":"INTEGRATED COMMON ALTITUDE REFERENCE SYSTEM FOR U-SPACE","startDate":"2020-05-01","endDate":"2022-07-31","projectUrl":"","objective":"ICARUS project proposes an innovative solution to the challenge of the Common Altitude Reference inside VLL airspaces with the definition of a new U-space service and its validation in a real operational environment. In manned aviation, the methods of determining the altitude of an aircraft are based on pressure altitude difference measurements (e.g. QFE, QNH and FL) referred to a common datum. \nThe UA flights superimpose a new challenge, since a small drone may take off and land almost from everywhere, hence reducing the original significance of QFE settings, introduced on behalf of manned pilots to display on the altimeter the 0-height at touchdown on the local runway. In fact, the possibility for n drones to take off at n different places would generate a series of n different QFE corresponding to different heights of ground pressures referred to the take-off “Home points”. Therefore for a large number drones, new methodologies and procedures shall be put in place. The ICARUS defines a new U-space U3 service tightly coupled with the interface of the existing U-space services (e.g. Tracking, and Flight Planning services). The users of ICARUS service shall be remote pilots competent to fly in BVLOS in the specific category of UAS operations and ultralight GA pilots potentially sharing the same VLL airspace. \nThe ICARUS proposed approach foresees the realization of DTM service embedded in an Application Program Interface (API) that can be queried by UAS pilot/operator (or by drone itself) based on the actual positioning of the UA along its trajectory, computed by the (E)GNSS receiver. The output of the DTM service would provide information on distance from ground/obstacles in combination with the common altitude reference.\nAccuracy, continuity, integrity and availability requirements for GNSS-based altimetry together with accuracy and resolution requirements of the DTM to be provided by ICARUS service are key topics of the study.","totalCost":"1385286,25","ecMaxContribution":"1144587,5","call":"H2020-SESAR-2019-2","fundingScheme":"SESAR-RIA","coordinator":"E-GEOS SPA","coordinatorCountry":"IT","participants":"TOPVIEW SRL;TELESPAZIO SPA;DRONERADAR SP Z O.O.;EUROCONTROL - EUROPEAN ORGANISATION FOR THE SAFETY OF AIR NAVIGATION;EUROUSC ESPANA SL;POLITECNICO DI MILANO;UNIVERSITA DEGLI STUDI DI ROMA LA SAPIENZA","participantCountries":"IT;PL;BE;ES","subjects":""} -{"rcn":"229284","id":"897004","acronym":"ISLand","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Isolation and Segregation Landscape. Archaeology of quarantine in the Indian Ocean World","startDate":"2020-11-01","endDate":"2023-10-31","projectUrl":"","objective":"The proposed research presents an experimental and completely novel investigation within the historical archaeology,\napplied to isolated contexts. The main objective of ISLand is to provide a new way of thinking about human interactions\nwithin colonial empires and bringing colonial studies into dialogue with medical history and the emerging concept of\nhealthscaping. It seeks to do so by studying quarantine facilities in the Indian Ocean World during the long nineteenth\ncentury, a crucial period for the history of European empires in that region and a flashpoint for the conceptualization of\nmodern public health. Quarantine, traditionally viewed as merely a mechanism for the control of disease, will be analyzed as\nthe outward material response to important changes taking place socially, ecologically, and politically at the time.\nThe project is a part of an international, interdisciplinary effort, combining history, archaeology, and anthropology. The\nresearcher will tap numerous archival sources and archaeological data from selected sites, examine them through social and\nspatial analysis, and systematically analyze a test case in Mauritius through the most innovative methods that target\nlandscape and standing archaeology.\nThe broader impacts of ISLand have relevance for current European approaches to the migration crisis, where the threat of\ndisease has been ignited as a potentially debilitating consequence of immigration from extra-European countries. The\ntraining-through-research project at the Stanford University, the top institution where acquiring knowledge and skills in\nhistorical archaeology, will allow the applicant to develop into a position of professional maturity with a specific\ninterdisciplinary set of skills. With the support of the host institutions in EU, the researcher will promote historical archaeology\nin European academy, stimulating new approaches in usual archaeological research and an interdisciplinary approach with\ncultural anthropology.","totalCost":"253052,16","ecMaxContribution":"253052,16","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-GF","coordinator":"UNIVERSITEIT VAN AMSTERDAM","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} -{"rcn":"229281","id":"896300","acronym":"STRETCH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Smart Textiles for RETrofitting and Monitoring of Cultural Heritage Buildings","startDate":"2020-09-01","endDate":"2022-08-31","projectUrl":"","objective":"This project aims to develop novel techniques using smart multifunctional materials for the combined seismic-plus-energy retrofitting, and Structural Health Monitoring (SHM) of the European cultural heritage buildings (CHB). The need for upgrading the existing old and CHB is becoming increasingly important for the EU countries, due to: (1) their poor structural performance during recent earthquakes (e.g. Italy, Greece) or other natural hazards (e.g. extreme weather conditions) that have resulted in significant economic losses, and loss of human lives; and (2) their low energy performance which increases significantly their energy consumption (buildings are responsible for 40% of EU energy consumption). Moreover, the SHM of the existing buildings is crucial for assessing continuously their structural integrity and thus to provide information for planning cost effective and sustainable maintenance decisions. Since replacing the old buildings with new is not financially feasible, and even it is not allowed for CHB, their lifetime extension requires considering simultaneously both structural and energy retrofitting. It is noted that the annual cost of repair and maintenance of existing European building stock is estimated to be about 50% of the total construction budget, currently standing at more than €300 billion. To achieve cost effectiveness, STRETCH explores a novel approach, which integrates technical textile reinforcement with thermal insulation systems and strain sensors to provide simultaneous structural-plus-energy retrofitting combined with SHM, tailored for masonry cultural heritage building envelopes. The effectiveness of the proposed retrofitting system will be validated experimentally and analytically. Moreover, draft guidelines and recommendations for determining future research on the use of smart composite materials for the concurrent retrofitting (structural-plus-energy) and SHM of the existing cultural heritage buildings envelopes will be proposed.","totalCost":"183473,28","ecMaxContribution":"183473,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"JRC -JOINT RESEARCH CENTRE- EUROPEAN COMMISSION","coordinatorCountry":"BE","participants":"","participantCountries":"","subjects":""} -{"rcn":"229265","id":"892890","acronym":"RhythmicPrediction","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Rhythmic prediction in speech perception: are our brain waves in sync with our native language?","startDate":"2021-01-01","endDate":"2022-12-31","projectUrl":"","objective":"Speech has rhythmic properties that widely differ across languages. When we listen to foreign languages, we may perceive them to be more musical, or rather more rap-like than our own. Even if we are unaware of it, the rhythm and melody of language, i.e. prosody, reflects its linguistic structure. On the one hand, prosody emphasizes content words and new information with stress and accents. On the other hand, it is aligned to phrase edges, marking them with boundary tones. Prosody hence helps the listener to focus on important words and to chunk sentences into phrases, and phrases into words. In fact, prosody is even used predictively, for instance to time the onset of the next word, the next piece of new information, or the total remaining length of the utterance, so the listener can seamlessly start their own speaking turn. \nSo, the listener, or rather their brain, is actively predicting when important speech events will happen, using prosody. How prosodic rhythms are exploited to predict speech timing, however, is unclear. No link between prosody and neural predictive processing has yet been empirically made. One hypothesis is that rhythm, such as the alternation of stressed and unstressed syllables, helps listeners time their attention. Similar behavior is best captured by the notion of an internal oscillator which can be set straight by attentional spikes. While neuroscientific evidence for the relation of neural oscillators to speech processing is starting to emerge, no link to the use of prosody nor predictive listening exists, yet. Furthermore, it is still unknown how native language knowledge affects cortical oscillations, and how oscillations are affected by cross-linguistic differences in rhythmic structure. The current project combines the standing knowledge of prosodic typology with the recent advances in neuroscience on cortical oscillations, to investigate the role of internal oscillators on native prosody perception, and active speech prediction.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE DE GENEVE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229235","id":"886828","acronym":"ASAP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Advanced Solutions for Asphalt Pavements","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"The Advanced Solutions for Asphalt Pavements (ASAP) project involves the development of a unique road paving technology which will use a bio-bitumen rejuvenator to rejuvenate aged asphalt bitumen. This technology will help to extend the lifespan of asphalt pavements (roads) and will reduce the environmental and economic impact of roads and road maintenance processes. Recycling and self-healing processes will replace fossil fuel dependent technology. Self-healing will involve rejuvenating aged asphalt bitumen using a bio-rejuvenator developed using microalgae oils (rejuvenating bio-oil). Microalgae has been selected because of its fast growth, versatility and ability to survive within hostile environments, such as wastewater. \n\nASAP will utilise microalgae, cultivated within the wastewater treatment process, as a source of the rejuvenating bio-oil. The solvent (Soxhlet) processes will be used to extract the oil from the microalgae. To ensure the efficiency of the oil extraction process, an ultrasonication process will be used to pre-treat the microalgae. The suitability of rejuvenating bio-oil as a replacement for the bitumen rejuvenator (fossil fuel based) will be ascertained via a series of standard bituminous and accelerated tests. A rejuvenator-binder diffusion numerical model will be developed, based on the Delft Lattice concrete diffusion model, to determine the conditions required for rejuvenation to occur and to ascertain the healing rate of the asphalt binder. These parameters will facilitate the selection and optimisation of the asphalt self-healing systems (specifically the amount of bio-oil rejuvenator and time required) to achieve full rejuvenation. \n\nThis novel approach will benchmark the effectiveness of this intervention against existing asphalt design and maintenance processes and assess feasibility. The ASAP project presents an opportunity to revolutionise road design and maintenance processes and reduce its environmental and financial costs.","totalCost":"187572,48","ecMaxContribution":"187572,48","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"NEDERLANDSE ORGANISATIE VOOR TOEGEPAST NATUURWETENSCHAPPELIJK ONDERZOEK TNO","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} -{"rcn":null,"id":"886776","acronym":null,"status":null,"programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D4","frameworkProgramme":"H2020","title":"BIO-Based pESTicides production for sustainable agriculture management plan","startDate":"2020-05-01","endDate":"2023-04-30","projectUrl":"","objective":"The BIOBESTicide project will validate and demonstrate the production of an effective and cost-efficient biopesticide. The demonstration will be based on an innovative bio-based value chain starting from the valorisation of sustainable biomasses, i.e. beet pulp and sugar molasses and will exploit the properties of the oomycete Pythium oligandrum strain I-5180 to increase natural plant defenses, to produce an highly effective and eco-friendly biopesticide solution for vine plants protection. \nBIOVITIS, the project coordinator, has developed, at laboratory level (TRL4), an effective method to biocontrol one of the major causes of worldwide vineyards destruction, the Grapevine Trunk Diseases (GTDs). The protection system is based on the oomycete Pythium oligandrum strain I-5180 that, at applied at optimal time and concentration, colonises the root of vines and stimulates the natural plant defences against GTDs, providing a protection that ranges between 40% and 60%. \nBIOBESTicide project will respond to the increasing demands for innovative solutions for crop protection agents, transferring the technology to a DEMO Plant able to produce more than 10 T of a high-quality oomycete-based biopesticide product per year (TRL7). \nThe BIOBESTicide project will validate the efficiency of the formulated product on vineyards of different geographical areas.\nTo assure the safety of products under both health and environmental points of view, a full and complete approval dossier for Pythium oligandrum strain I-5180 will be submitted in all the European countries. \nA Life Cycle Sustainability Assessment (LCSA) will be conducted to assess the environmental, economic and social impacts of the developed products.\nThe adoption of the effective and cost-efficient biopesticide will have significant impacts with a potential ROI of 30 % in just 5 years and a total EBITDA of more than € 6,400,000.","totalCost":"4402772,5","ecMaxContribution":"3069653","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"BIOVITIS","coordinatorCountry":"FR","participants":"MERCIER FRERES SARL;FUNDACION TECNALIA RESEARCH & INNOVATION;LAMBERTI SPA;EURION CONSULTING;CIAOTECH Srl;STOWARZYSZENIE ZACHODNIOPOMORSKI KLASTER CHEMICZNY ZIELONA CHEMIA;NORDZUCKER AG;INSTITUT NATIONAL DE RECHERCHE POUR L'AGRICULTURE, L'ALIMENTATION ET L'ENVIRONNEMENT;INSTITUT FRANCAIS DE LA VIGNE ET DU VIN","participantCountries":"FR;ES;IT;PL;DE","subjects":""} -{"rcn":null,"id":"886776","acronym":null,"status":null,"programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4","frameworkProgramme":"H2020","title":"BIO-Based pESTicides production for sustainable agriculture management plan","startDate":"2020-05-01","endDate":"2023-04-30","projectUrl":"","objective":"The BIOBESTicide project will validate and demonstrate the production of an effective and cost-efficient biopesticide. The demonstration will be based on an innovative bio-based value chain starting from the valorisation of sustainable biomasses, i.e. beet pulp and sugar molasses and will exploit the properties of the oomycete Pythium oligandrum strain I-5180 to increase natural plant defenses, to produce an highly effective and eco-friendly biopesticide solution for vine plants protection. \nBIOVITIS, the project coordinator, has developed, at laboratory level (TRL4), an effective method to biocontrol one of the major causes of worldwide vineyards destruction, the Grapevine Trunk Diseases (GTDs). The protection system is based on the oomycete Pythium oligandrum strain I-5180 that, at applied at optimal time and concentration, colonises the root of vines and stimulates the natural plant defences against GTDs, providing a protection that ranges between 40% and 60%. \nBIOBESTicide project will respond to the increasing demands for innovative solutions for crop protection agents, transferring the technology to a DEMO Plant able to produce more than 10 T of a high-quality oomycete-based biopesticide product per year (TRL7). \nThe BIOBESTicide project will validate the efficiency of the formulated product on vineyards of different geographical areas.\nTo assure the safety of products under both health and environmental points of view, a full and complete approval dossier for Pythium oligandrum strain I-5180 will be submitted in all the European countries. \nA Life Cycle Sustainability Assessment (LCSA) will be conducted to assess the environmental, economic and social impacts of the developed products.\nThe adoption of the effective and cost-efficient biopesticide will have significant impacts with a potential ROI of 30 % in just 5 years and a total EBITDA of more than € 6,400,000.","totalCost":"4402772,5","ecMaxContribution":"3069653","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"BIOVITIS","coordinatorCountry":"FR","participants":"MERCIER FRERES SARL;FUNDACION TECNALIA RESEARCH & INNOVATION;LAMBERTI SPA;EURION CONSULTING;CIAOTECH Srl;STOWARZYSZENIE ZACHODNIOPOMORSKI KLASTER CHEMICZNY ZIELONA CHEMIA;NORDZUCKER AG;INSTITUT NATIONAL DE RECHERCHE POUR L'AGRICULTURE, L'ALIMENTATION ET L'ENVIRONNEMENT;INSTITUT FRANCAIS DE LA VIGNE ET DU VIN","participantCountries":"FR;ES;IT;PL;DE","subjects":""} -{"rcn":"229276","id":"895426","acronym":"DisMoBoH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Dissecting the molecular building principles of locally formed transcriptional hubs","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Numerous DNA variants have already been identified that modulate inter-individual molecular traits – most prominently gene expression. However, since finding mechanistic interpretations relating genotype to phenotype has proven challenging, the focus has shifted to higher-order regulatory features, i.e. chromatin accessibility, transcription factor (TF) binding and 3D chromatin interactions. This revealed at least two enhancer types: “lead” enhancers in which the presence of genetic variants modulates the activity of entire chromatin domains, and “dependent” ones in which variants induce subtle changes, affecting DNA accessibility, but not transcription. Although cell type-specific TFs are likely important, it remains unclear which sequence features are required to establish such enhancer hierarchies, and under which circumstances genetic variation results in altered enhancer-promoter contacts and differential gene expression. Here, we propose to investigate the molecular mechanisms that link DNA variation to TF binding, chromatin topology, and gene expression response. We will leverage data on enhancer hierarchy and sequence-specific TF binding to identify the sequence signatures that define “lead” enhancers. The results will guide the design of a synthetic locus that serves as an in vivo platform to systematically vary the building blocks of local transcriptional units: i) DNA sequence – including variations in TF binding site affinity and syntax, ii) molecular interactions between TFs, and iii) chromatin conformation. To validate our findings, we will perform optical reconstruction of chromatin architecture for a select number of DNA variants. By simultaneously perturbing co-dependent features, this proposal will provide novel mechanistic insights into the formation of local transcriptional hubs.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-RI","coordinator":"ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229288","id":"898218","acronym":"devUTRs","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Uncovering the roles of 5′UTRs in translational control during early zebrafish development","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Following fertilisation, metazoan embryos are transcriptionally silent, and embryogenesis is controlled by maternally deposited factors. Developmental progression requires the synthesis of new mRNAs and proteins in a coordinated fashion. Many posttranscriptional mechanisms regulate the fate of maternal mRNAs, but it is less understood how translational control shapes early embryogenesis. In eukaryotes, translation starts at the mRNA 5′ end, consisting of the 5′ cap and 5′ untranslated region (UTR). Protein synthesis is primarily regulated at the translation initiation step by elements within the 5′UTR. However, the role of 5′UTRs in regulating the dynamics of mRNA translation during vertebrate embryogenesis remains unexplored. For example, all vertebrate ribosomal protein (RP) mRNAs harbor a conserved terminal oligopyrimidine tract (TOP) in their 5′UTR. RP levels must be tightly controlled to ensure proper organismal development, but if and how the TOP motif mediates RP mRNA translational regulation during embryogenesis is unclear. Overall, we lack a systematic understanding of the regulatory information contained in 5′UTRs. In this work, I aim to uncover the 5′UTR in vivo rules for mRNA translational regulation during zebrafish embryogenesis. I propose to apply imaging and biochemical approaches to characterise the role of the TOP motif in RP mRNA translational regulation during embryogenesis and identify the trans-acting factor(s) that bind(s) to it (Aim 1). To systematically assess the contribution of 5′UTRs to mRNA translational regulation during zebrafish embryogenesis, I will couple a massively parallel reporter assay of 5′UTRs to polysome profiling (Aim 2). By integrating the translational behaviour of 5′UTR reporters throughout embryogenesis with sequence-based regression models, I anticipate to uncover novel cis-regulatory elements in 5′UTRs with developmental roles.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITAT BASEL","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229261","id":"893787","acronym":"HOLYHOST","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Welfare and Hosting buildings in the “Holy Land” between the 4th and the 7th c. AD","startDate":"2020-10-01","endDate":"2022-09-30","projectUrl":"","objective":"Between the 4th and the 7th century AD, many hospices dedicated to the poor, elderly, strangers and travelers were built in the countryside, along roads, around and inside cities. They were commissioned by the Church, rich pious men and women concerned by the redeem of their sins, as well as emperors who saw this as a guarantee of social stability. Welfare is thus an important phenomena of Late Antiquity, abundantly mentioned by ancient literary sources and inscriptions, particularly in the eastern part of the Empire. However, the buildings that provided shelter and care to the needy have not yet received sufficient attention from archaeologists. Except for buildings which were identified by their inventors as hostels dedicated to pilgrims, they are still invisible in the field. \nThe aim of the HOLYHOST research project is to bring this social history’s main topic on the field of archaeology. It will address the welfare issue through the archaeological and architectural survey and study of Ancient welfare and hosting establishments’ remains, in the Holy Land (Palestine and Jordan) and around. This work will contribute to a better understanding of the practices linked to hospitality, welfare, accommodation and care in Antiquity. Moreover, such establishments served as models for medieval and modern Islamic, Jewish and Christian waqf institutions (religious endowment), and welfare continues to be highly relevant nowadays, through issues still at the heart of contemporary challenges debated in Europe: poverty, social exclusion, migrant crisis, principle of reception and hospitality. This interdisciplinary and diachronic research project will thus offer many new research perspectives, in terms of history of architecture, evolution of care practices, social and political regulations.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE PARIS I PANTHEON-SORBONNE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229282","id":"896189","acronym":"MICADO","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Microbial contribution to continental wetland carbon budget","startDate":"2021-01-04","endDate":"2023-01-03","projectUrl":"","objective":"Continental wetlands are major carbon dioxide sinks but the second largest source of methane. Monitoring of wetland methane emissions revealed large inter-site variability that is hard to explain in the framework of current biogeochemical theories. Methane production in wetlands is an anaerobic microbial driven process involving a complex set of microbial metabolisms depending on the availability of (i) energy (via the presence of specific redox couples), (ii) organic substrates and (iii) specific microbial communities. To understand the complexity of microbial drivers on wetland methane emissions and quantify their contribution, the MICADO project will set up a multidisciplinary approach linking isotope organic geochemistry and environmental microbiology to assess microbial functioning in situ. As an organic geochemist I have developed an innovative approach to trace in situ microbial activity via compound specific carbon isotope analysis of microbe macromolecules and organic metabolites. The host institution is a leader in France in environmental microbiology and biogeochemistry developing high-throughput metagenomics and microbial rate assessments, for which I will be trained during the MICADO project. These techniques are highly complementary and combined they will provide a comprehensive knowledge on microbial metabolisms involved in organic matter degradation encompassing their complexity and interactions. This will revisit the relationships between organic substrate availability and microbial communities and will contribute at estimating the impact of microbial activity on wetland methane emissions. This project will give me the opportunity to acquire fundamental knowledge and to develop original lines of research that will consolidate my position as an independent scientist in biogeochemistry.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"CENTRE NATIONAL DE LA RECHERCHE SCIENTIFIQUE CNRS","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229249","id":"891624","acronym":"CuTAN","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Copper-Catalyzed Multicomponent Reactions in Tandem Processes for Target Molecule Synthesis","startDate":"2021-02-01","endDate":"2023-01-31","projectUrl":"","objective":"The invention of processes that can form several bonds, stereocentres and rings in a single process is key to a sustainable future in synthetic chemistry. Multicomponent reactions and tandem procedures are two strategies that enable the rapid build-up of molecular complexity from simple reagents. By combining these two strategies into a single procedure, the diversity, complexity and value of products can be further enhanced along with the efficiency and economy of their construction. In this project, Dr Satpathi will develop novel copper-catalyzed multicomponent couplings of unsaturated hydrocarbons (e.g. allenes, enynes) with imines and boron reagents. These procedures will provide high-value amine products with universally high regio-, diastero- and enantiocontrol. The products will bear a variety of synthetic handles, for example, amino, alkynyl/alkenyl, and boryl groups, thus the products are primed for subsequent transformation. Dr Satpathi will exploit this functionality in tandem intramolecular couplings (e.g. intramolecular Suzuki/Buchwald-Hartwig reactions) to provide core cyclic structures of drug molecules and natural products. Thus, through a tandem procedure of; 1) copper-catalyzed borofunctionalization, and; 2) subsequent transition-metal catalyzed cyclization, he will gain efficient access to highly sought-after complex molecules. Overall, the process will provide high-value, chiral, cyclic motifs from abundant, achiral, linear substrates. Finally, Dr Satpathi has identified the phthalide-isoquinoline family of alkaloids as target molecules to display the power of his tandem methodology. Dr Satpathi has devised a novel route, which begins with our tandem multifunctionalization/cyclization reaction, to provide a range of these important alkaloids. The chosen alkaloids are of particular interest as they display a range of bioactivities – for example as natural products, receptor antagonists and on-market drugs.","totalCost":"212933,76","ecMaxContribution":"212933,76","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"THE UNIVERSITY OF MANCHESTER","coordinatorCountry":"UK","participants":"","participantCountries":"","subjects":""} -{"rcn":"229239","id":"887259","acronym":"ALEHOOP","status":"SIGNED","programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D3","frameworkProgramme":"H2020","title":"Biorefineries for the valorisation of macroalgal residual biomass and legume processing by-products to obtain new protein value chains for high-value food and feed applications","startDate":"2020-06-01","endDate":"2024-05-31","projectUrl":"","objective":"ALEHOOP provides the demonstration at pilot scale of both sustainable macroalgae and legume-based biorefineries for the recovery of low-cost dietary proteins from alga-based and plant residual biomass and their validation to meet market requirements of consumers and industry in the food and feed sectors. In these sectors, consumers are demanding affordable functional natural proteins from alternative sources and industry is demanding low-cost bio-based protein formulations with better performance and higher sustainability. \nCurrent protein demand for the 7.3 billion inhabitants of the world is approximately 202 Mt. Due to the rise in meat consumption more proteins are therefore required for animal feeding. To satisfy the current protein demand, Europe imports over 30 Mt of soy from the Americas each year mainly for animal feeding, entailing 95% dependency of EU on imported soy. Current sources of proteins are becoming unsustainable from an economic and environmental perspective for Europe resulting in concerns for sustainability and food security and leading to search for new alternative proteins. \nALEHOOP addresses the obtaining of proteins from green macroalgal blooms, brown seaweed by-products from algae processors and legume processing by-products (peas, lupines, beans and lentils) as alternative protein sources for animal feeding (case of green seaweed) and food applications (case of brown seaweed and legume by-products), since they are low cost and under-exploited biomass that do not compete with traditional food crops for space and resources. This will reduce EU´s dependency on protein imports and contribute to our raw material security. The new proteins will be validated in foods for elderly, sporty and overweight people, vegetarians and healthy consumers as well as for animal feed creating cross-sectorial interconnection between these value chains and supporting the projected business plan.","totalCost":"6718370","ecMaxContribution":"5140274,41","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"CONTACTICA S.L.","coordinatorCountry":"ES","participants":"CENTIV GMBH;ALGINOR ASA;FUNDACION TECNALIA RESEARCH & INNOVATION;INDUKERN,S.A.;ASOCIACION NACIONAL DE FABRICANTES DE CONSERVAS DE PESCADOS Y MARISCOS-CENTRO TECNICO NACIONAL DE CONSERVACION DE PRODUCTOS DE LA PESCA;BIOZOON GMBH;EIGEN VERMOGEN VAN HET INSTITUUT VOOR LANDBOUW- EN VISSERIJONDERZOEK;BIOSURYA SL;VYZKUMNY USTAV VETERINARNIHO LEKARSTVI;NUTRITION SCIENCES;TECHNOLOGICAL UNIVERSITY DUBLIN;GARLAN, S.COOP.;ISANATUR SPAIN SL;UNIVERSIDAD DE VIGO;UNIVERSIDAD DE CADIZ","participantCountries":"DE;NO;ES;BE;CZ;IE","subjects":""} -{"rcn":"229239","id":"887259","acronym":"ALEHOOP","status":"SIGNED","programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3","frameworkProgramme":"H2020","title":"Biorefineries for the valorisation of macroalgal residual biomass and legume processing by-products to obtain new protein value chains for high-value food and feed applications","startDate":"2020-06-01","endDate":"2024-05-31","projectUrl":"","objective":"ALEHOOP provides the demonstration at pilot scale of both sustainable macroalgae and legume-based biorefineries for the recovery of low-cost dietary proteins from alga-based and plant residual biomass and their validation to meet market requirements of consumers and industry in the food and feed sectors. In these sectors, consumers are demanding affordable functional natural proteins from alternative sources and industry is demanding low-cost bio-based protein formulations with better performance and higher sustainability. \nCurrent protein demand for the 7.3 billion inhabitants of the world is approximately 202 Mt. Due to the rise in meat consumption more proteins are therefore required for animal feeding. To satisfy the current protein demand, Europe imports over 30 Mt of soy from the Americas each year mainly for animal feeding, entailing 95% dependency of EU on imported soy. Current sources of proteins are becoming unsustainable from an economic and environmental perspective for Europe resulting in concerns for sustainability and food security and leading to search for new alternative proteins. \nALEHOOP addresses the obtaining of proteins from green macroalgal blooms, brown seaweed by-products from algae processors and legume processing by-products (peas, lupines, beans and lentils) as alternative protein sources for animal feeding (case of green seaweed) and food applications (case of brown seaweed and legume by-products), since they are low cost and under-exploited biomass that do not compete with traditional food crops for space and resources. This will reduce EU´s dependency on protein imports and contribute to our raw material security. The new proteins will be validated in foods for elderly, sporty and overweight people, vegetarians and healthy consumers as well as for animal feed creating cross-sectorial interconnection between these value chains and supporting the projected business plan.","totalCost":"6718370","ecMaxContribution":"5140274,41","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"CONTACTICA S.L.","coordinatorCountry":"ES","participants":"CENTIV GMBH;ALGINOR ASA;FUNDACION TECNALIA RESEARCH & INNOVATION;INDUKERN,S.A.;ASOCIACION NACIONAL DE FABRICANTES DE CONSERVAS DE PESCADOS Y MARISCOS-CENTRO TECNICO NACIONAL DE CONSERVACION DE PRODUCTOS DE LA PESCA;BIOZOON GMBH;EIGEN VERMOGEN VAN HET INSTITUUT VOOR LANDBOUW- EN VISSERIJONDERZOEK;BIOSURYA SL;VYZKUMNY USTAV VETERINARNIHO LEKARSTVI;NUTRITION SCIENCES;TECHNOLOGICAL UNIVERSITY DUBLIN;GARLAN, S.COOP.;ISANATUR SPAIN SL;UNIVERSIDAD DE VIGO;UNIVERSIDAD DE CADIZ","participantCountries":"DE;NO;ES;BE;CZ;IE","subjects":""} -{"rcn":"229258","id":"892834","acronym":"DENVPOC","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"qPCR Microfluidics point-of-care platform for dengue diagnosis","startDate":"2020-05-18","endDate":"2022-05-17","projectUrl":"","objective":"As a result of Global climate change and fast urbanization, global outbreaks of Dengue (DENV)/ Zika(ZIKV)/Chikungunya(CHIKV) virus have the potential to occur. The most common pathway of these infections in humans is through the female Aedes mosquito vector. DENV is an exanthematous febrile disease with varied clinical manifestations and progressions . Due to similarities in symptoms between DENV and ZIKV and CHIKV, it is difficult to make a differential diagnosis, impeding appropriate, timely medical intervention. Furthermore, cross-reactivity with ZIKV, which was recently related to microcephaly, is a serious issue. In 2016, in Brazil alone, there were 4180 microcephaly cases reported instead of 163 cases, more in line with yearly expected projections , , Thus, the sooner an accurate diagnostic which differentiates DENV from the other manifestations is critical; most especially at the early stages of the infection, to have a reliable diagnosis in pregnant women. In 2016, the OMS emergency committee declared that the outbreaks and the potentially resultant neurological disorders in Brazil were an important international state of emergency in public health, as a result of the associated secondary effects; these diseases became a Global concern. This project allows developing a highly and fast Multiplex qPCR POC platform by using FASTGENE technology with a minimal amount of patient serotype. It would reduce the time of analysis (30 to 90’ for a standard) and costs. Additionally, the sample preprocessing and thermalization will shorten real-time PCR amplification time and will be integrated within the microfluidic systems. This platform can result in a commercialized product whereupon a main market target would be pregnant women and people living or traveling through/from outbreak risk areas.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-SE","coordinator":"BFORCURE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229280","id":"895716","acronym":"DoMiCoP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"The Diffusion of Migration Control Practice. Actors, Processes and Effects.","startDate":"2021-03-01","endDate":"2023-02-28","projectUrl":"","objective":"DoMiCoP develops new understandings and perspectives to study migration control in practice in the European Union by asking one main question: how and why do communities of practice develop and diffuse the knowledge required to put migration control into action? Unlike the nexus between expert knowledge, epistemic communities and policy formulation, the nexus between everyday knowledge, communities of practice and policy implementation has not yet received systematic scholarly attention. My project bridges that gap by focusing on intermediate arenas in which communities of practice take shape most notably the meetings and trainings that gather state and non-state actors involved in putting asylum, detention and removal into practice. By building on field-based methodologies (interviews and participant observations), DoMiCoP sheds ethnographic light on the role that ‘learning from abroad’ plays in the implementation of migration control in the EU. My project’s aim is threefold: 1) Identifying arenas at intermediate levels in which communities of practice take shape; 2) Analysing the communities of practice by focusing on the configurations of actors and organizations involved, the motivations underlying their involvement, the process of knowledge development in interaction, the conflicts and negotiations; 3) Revealing the role of non-state organizations (private for profit and not-for-profit). From a theoretical point of view, this project goes beyond the classical view of the implementation as a test to assess the effectiveness of policy transfers towards an analysis of policy transfer at that level of policy-making. From an empirical point of view, the project expands knowledge about less-studied venues of policy-making and provides original thick descriptions. From a methodological point of view, the project engages with qualitative methods for the study of policy diffusion and aims at responding to their main challenges through participant observation.","totalCost":"163673,28","ecMaxContribution":"163673,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"EUROPEAN UNIVERSITY INSTITUTE","coordinatorCountry":"IT","participants":"","participantCountries":"","subjects":""} -{"rcn":"230066","id":"883730","acronym":"SOLSPACE","status":"SIGNED","programme":"H2020-EU.1.1.","topics":"ERC-2019-ADG","frameworkProgramme":"H2020","title":"Enhancing Global Clean Energy Services Using Orbiting Solar Reflectors", "startDate":"2021-03-01","endDate":"2025-11-30","projectUrl":"","objective":"fake", "totalCost":"2496392","ecMaxContribution":"2496392","call":"ERC-2019-ADG","fundingScheme":"ERC-ADG","coordinator":"UNIVERSITY OF GLASGOW","coordinatorCountry":"UK","participants":"","participantCountries":"","subjects":""} +{"id":"894593","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019"} +{"id":"897004","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"896300","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"892890","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"886828","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"886776","programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D4"} +{"id":"886776","programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4"} +{"id":"895426","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"898218","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"893787","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"896189","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"891624","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"887259","programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D3"} +{"id":"887259","programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3"} +{"id":"892834","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"895716","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"883730","programme":"H2020-EU.1.1.","topics":"ERC-2019-ADG"} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json index edf83fbc8..ae1b7cb82 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json @@ -1,16 +1,16 @@ -{"rcn":"229267","id":"894593","acronym":"ICARUS","status":"SIGNED","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019","frameworkProgramme":"H2020","title":"INTEGRATED COMMON ALTITUDE REFERENCE SYSTEM FOR U-SPACE","startDate":"2020-05-01","endDate":"2022-07-31","projectUrl":"","objective":"ICARUS project proposes an innovative solution to the challenge of the Common Altitude Reference inside VLL airspaces with the definition of a new U-space service and its validation in a real operational environment. In manned aviation, the methods of determining the altitude of an aircraft are based on pressure altitude difference measurements (e.g. QFE, QNH and FL) referred to a common datum. \nThe UA flights superimpose a new challenge, since a small drone may take off and land almost from everywhere, hence reducing the original significance of QFE settings, introduced on behalf of manned pilots to display on the altimeter the 0-height at touchdown on the local runway. In fact, the possibility for n drones to take off at n different places would generate a series of n different QFE corresponding to different heights of ground pressures referred to the take-off “Home points”. Therefore for a large number drones, new methodologies and procedures shall be put in place. The ICARUS defines a new U-space U3 service tightly coupled with the interface of the existing U-space services (e.g. Tracking, and Flight Planning services). The users of ICARUS service shall be remote pilots competent to fly in BVLOS in the specific category of UAS operations and ultralight GA pilots potentially sharing the same VLL airspace. \nThe ICARUS proposed approach foresees the realization of DTM service embedded in an Application Program Interface (API) that can be queried by UAS pilot/operator (or by drone itself) based on the actual positioning of the UA along its trajectory, computed by the (E)GNSS receiver. The output of the DTM service would provide information on distance from ground/obstacles in combination with the common altitude reference.\nAccuracy, continuity, integrity and availability requirements for GNSS-based altimetry together with accuracy and resolution requirements of the DTM to be provided by ICARUS service are key topics of the study.","totalCost":"1385286,25","ecMaxContribution":"1144587,5","call":"H2020-SESAR-2019-2","fundingScheme":"SESAR-RIA","coordinator":"E-GEOS SPA","coordinatorCountry":"IT","participants":"TOPVIEW SRL;TELESPAZIO SPA;DRONERADAR SP Z O.O.;EUROCONTROL - EUROPEAN ORGANISATION FOR THE SAFETY OF AIR NAVIGATION;EUROUSC ESPANA SL;POLITECNICO DI MILANO;UNIVERSITA DEGLI STUDI DI ROMA LA SAPIENZA","participantCountries":"IT;PL;BE;ES","subjects":""} -{"rcn":"229284","id":"897004","acronym":"ISLand","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Isolation and Segregation Landscape. Archaeology of quarantine in the Indian Ocean World","startDate":"2020-11-01","endDate":"2023-10-31","projectUrl":"","objective":"The proposed research presents an experimental and completely novel investigation within the historical archaeology,\napplied to isolated contexts. The main objective of ISLand is to provide a new way of thinking about human interactions\nwithin colonial empires and bringing colonial studies into dialogue with medical history and the emerging concept of\nhealthscaping. It seeks to do so by studying quarantine facilities in the Indian Ocean World during the long nineteenth\ncentury, a crucial period for the history of European empires in that region and a flashpoint for the conceptualization of\nmodern public health. Quarantine, traditionally viewed as merely a mechanism for the control of disease, will be analyzed as\nthe outward material response to important changes taking place socially, ecologically, and politically at the time.\nThe project is a part of an international, interdisciplinary effort, combining history, archaeology, and anthropology. The\nresearcher will tap numerous archival sources and archaeological data from selected sites, examine them through social and\nspatial analysis, and systematically analyze a test case in Mauritius through the most innovative methods that target\nlandscape and standing archaeology.\nThe broader impacts of ISLand have relevance for current European approaches to the migration crisis, where the threat of\ndisease has been ignited as a potentially debilitating consequence of immigration from extra-European countries. The\ntraining-through-research project at the Stanford University, the top institution where acquiring knowledge and skills in\nhistorical archaeology, will allow the applicant to develop into a position of professional maturity with a specific\ninterdisciplinary set of skills. With the support of the host institutions in EU, the researcher will promote historical archaeology\nin European academy, stimulating new approaches in usual archaeological research and an interdisciplinary approach with\ncultural anthropology.","totalCost":"253052,16","ecMaxContribution":"253052,16","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-GF","coordinator":"UNIVERSITEIT VAN AMSTERDAM","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} -{"rcn":"229281","id":"896300","acronym":"STRETCH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Smart Textiles for RETrofitting and Monitoring of Cultural Heritage Buildings","startDate":"2020-09-01","endDate":"2022-08-31","projectUrl":"","objective":"This project aims to develop novel techniques using smart multifunctional materials for the combined seismic-plus-energy retrofitting, and Structural Health Monitoring (SHM) of the European cultural heritage buildings (CHB). The need for upgrading the existing old and CHB is becoming increasingly important for the EU countries, due to: (1) their poor structural performance during recent earthquakes (e.g. Italy, Greece) or other natural hazards (e.g. extreme weather conditions) that have resulted in significant economic losses, and loss of human lives; and (2) their low energy performance which increases significantly their energy consumption (buildings are responsible for 40% of EU energy consumption). Moreover, the SHM of the existing buildings is crucial for assessing continuously their structural integrity and thus to provide information for planning cost effective and sustainable maintenance decisions. Since replacing the old buildings with new is not financially feasible, and even it is not allowed for CHB, their lifetime extension requires considering simultaneously both structural and energy retrofitting. It is noted that the annual cost of repair and maintenance of existing European building stock is estimated to be about 50% of the total construction budget, currently standing at more than €300 billion. To achieve cost effectiveness, STRETCH explores a novel approach, which integrates technical textile reinforcement with thermal insulation systems and strain sensors to provide simultaneous structural-plus-energy retrofitting combined with SHM, tailored for masonry cultural heritage building envelopes. The effectiveness of the proposed retrofitting system will be validated experimentally and analytically. Moreover, draft guidelines and recommendations for determining future research on the use of smart composite materials for the concurrent retrofitting (structural-plus-energy) and SHM of the existing cultural heritage buildings envelopes will be proposed.","totalCost":"183473,28","ecMaxContribution":"183473,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"JRC -JOINT RESEARCH CENTRE- EUROPEAN COMMISSION","coordinatorCountry":"BE","participants":"","participantCountries":"","subjects":""} -{"rcn":"229265","id":"892890","acronym":"RhythmicPrediction","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Rhythmic prediction in speech perception: are our brain waves in sync with our native language?","startDate":"2021-01-01","endDate":"2022-12-31","projectUrl":"","objective":"Speech has rhythmic properties that widely differ across languages. When we listen to foreign languages, we may perceive them to be more musical, or rather more rap-like than our own. Even if we are unaware of it, the rhythm and melody of language, i.e. prosody, reflects its linguistic structure. On the one hand, prosody emphasizes content words and new information with stress and accents. On the other hand, it is aligned to phrase edges, marking them with boundary tones. Prosody hence helps the listener to focus on important words and to chunk sentences into phrases, and phrases into words. In fact, prosody is even used predictively, for instance to time the onset of the next word, the next piece of new information, or the total remaining length of the utterance, so the listener can seamlessly start their own speaking turn. \nSo, the listener, or rather their brain, is actively predicting when important speech events will happen, using prosody. How prosodic rhythms are exploited to predict speech timing, however, is unclear. No link between prosody and neural predictive processing has yet been empirically made. One hypothesis is that rhythm, such as the alternation of stressed and unstressed syllables, helps listeners time their attention. Similar behavior is best captured by the notion of an internal oscillator which can be set straight by attentional spikes. While neuroscientific evidence for the relation of neural oscillators to speech processing is starting to emerge, no link to the use of prosody nor predictive listening exists, yet. Furthermore, it is still unknown how native language knowledge affects cortical oscillations, and how oscillations are affected by cross-linguistic differences in rhythmic structure. The current project combines the standing knowledge of prosodic typology with the recent advances in neuroscience on cortical oscillations, to investigate the role of internal oscillators on native prosody perception, and active speech prediction.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE DE GENEVE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229235","id":"886828","acronym":"ASAP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Advanced Solutions for Asphalt Pavements","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"The Advanced Solutions for Asphalt Pavements (ASAP) project involves the development of a unique road paving technology which will use a bio-bitumen rejuvenator to rejuvenate aged asphalt bitumen. This technology will help to extend the lifespan of asphalt pavements (roads) and will reduce the environmental and economic impact of roads and road maintenance processes. Recycling and self-healing processes will replace fossil fuel dependent technology. Self-healing will involve rejuvenating aged asphalt bitumen using a bio-rejuvenator developed using microalgae oils (rejuvenating bio-oil). Microalgae has been selected because of its fast growth, versatility and ability to survive within hostile environments, such as wastewater. \n\nASAP will utilise microalgae, cultivated within the wastewater treatment process, as a source of the rejuvenating bio-oil. The solvent (Soxhlet) processes will be used to extract the oil from the microalgae. To ensure the efficiency of the oil extraction process, an ultrasonication process will be used to pre-treat the microalgae. The suitability of rejuvenating bio-oil as a replacement for the bitumen rejuvenator (fossil fuel based) will be ascertained via a series of standard bituminous and accelerated tests. A rejuvenator-binder diffusion numerical model will be developed, based on the Delft Lattice concrete diffusion model, to determine the conditions required for rejuvenation to occur and to ascertain the healing rate of the asphalt binder. These parameters will facilitate the selection and optimisation of the asphalt self-healing systems (specifically the amount of bio-oil rejuvenator and time required) to achieve full rejuvenation. \n\nThis novel approach will benchmark the effectiveness of this intervention against existing asphalt design and maintenance processes and assess feasibility. The ASAP project presents an opportunity to revolutionise road design and maintenance processes and reduce its environmental and financial costs.","totalCost":"187572,48","ecMaxContribution":"187572,48","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"NEDERLANDSE ORGANISATIE VOOR TOEGEPAST NATUURWETENSCHAPPELIJK ONDERZOEK TNO","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} -{"rcn":"229236","id":"886776","acronym":"BIOBESTicide","status":"SIGNED","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4","frameworkProgramme":"H2020","title":"BIO-Based pESTicides production for sustainable agriculture management plan","startDate":"2020-05-01","endDate":"2023-04-30","projectUrl":"","objective":"The BIOBESTicide project will validate and demonstrate the production of an effective and cost-efficient biopesticide. The demonstration will be based on an innovative bio-based value chain starting from the valorisation of sustainable biomasses, i.e. beet pulp and sugar molasses and will exploit the properties of the oomycete Pythium oligandrum strain I-5180 to increase natural plant defenses, to produce an highly effective and eco-friendly biopesticide solution for vine plants protection. \nBIOVITIS, the project coordinator, has developed, at laboratory level (TRL4), an effective method to biocontrol one of the major causes of worldwide vineyards destruction, the Grapevine Trunk Diseases (GTDs). The protection system is based on the oomycete Pythium oligandrum strain I-5180 that, at applied at optimal time and concentration, colonises the root of vines and stimulates the natural plant defences against GTDs, providing a protection that ranges between 40% and 60%. \nBIOBESTicide project will respond to the increasing demands for innovative solutions for crop protection agents, transferring the technology to a DEMO Plant able to produce more than 10 T of a high-quality oomycete-based biopesticide product per year (TRL7). \nThe BIOBESTicide project will validate the efficiency of the formulated product on vineyards of different geographical areas.\nTo assure the safety of products under both health and environmental points of view, a full and complete approval dossier for Pythium oligandrum strain I-5180 will be submitted in all the European countries. \nA Life Cycle Sustainability Assessment (LCSA) will be conducted to assess the environmental, economic and social impacts of the developed products.\nThe adoption of the effective and cost-efficient biopesticide will have significant impacts with a potential ROI of 30 % in just 5 years and a total EBITDA of more than € 6,400,000.","totalCost":"4402772,5","ecMaxContribution":"3069653","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"BIOVITIS","coordinatorCountry":"FR","participants":"MERCIER FRERES SARL;FUNDACION TECNALIA RESEARCH & INNOVATION;LAMBERTI SPA;EURION CONSULTING;CIAOTECH Srl;STOWARZYSZENIE ZACHODNIOPOMORSKI KLASTER CHEMICZNY ZIELONA CHEMIA;NORDZUCKER AG;INSTITUT NATIONAL DE RECHERCHE POUR L'AGRICULTURE, L'ALIMENTATION ET L'ENVIRONNEMENT;INSTITUT FRANCAIS DE LA VIGNE ET DU VIN","participantCountries":"FR;ES;IT;PL;DE","subjects":""} -{"rcn":"229276","id":"895426","acronym":"DisMoBoH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Dissecting the molecular building principles of locally formed transcriptional hubs","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Numerous DNA variants have already been identified that modulate inter-individual molecular traits – most prominently gene expression. However, since finding mechanistic interpretations relating genotype to phenotype has proven challenging, the focus has shifted to higher-order regulatory features, i.e. chromatin accessibility, transcription factor (TF) binding and 3D chromatin interactions. This revealed at least two enhancer types: “lead” enhancers in which the presence of genetic variants modulates the activity of entire chromatin domains, and “dependent” ones in which variants induce subtle changes, affecting DNA accessibility, but not transcription. Although cell type-specific TFs are likely important, it remains unclear which sequence features are required to establish such enhancer hierarchies, and under which circumstances genetic variation results in altered enhancer-promoter contacts and differential gene expression. Here, we propose to investigate the molecular mechanisms that link DNA variation to TF binding, chromatin topology, and gene expression response. We will leverage data on enhancer hierarchy and sequence-specific TF binding to identify the sequence signatures that define “lead” enhancers. The results will guide the design of a synthetic locus that serves as an in vivo platform to systematically vary the building blocks of local transcriptional units: i) DNA sequence – including variations in TF binding site affinity and syntax, ii) molecular interactions between TFs, and iii) chromatin conformation. To validate our findings, we will perform optical reconstruction of chromatin architecture for a select number of DNA variants. By simultaneously perturbing co-dependent features, this proposal will provide novel mechanistic insights into the formation of local transcriptional hubs.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-RI","coordinator":"ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229288","id":"898218","acronym":"devUTRs","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Uncovering the roles of 5′UTRs in translational control during early zebrafish development","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Following fertilisation, metazoan embryos are transcriptionally silent, and embryogenesis is controlled by maternally deposited factors. Developmental progression requires the synthesis of new mRNAs and proteins in a coordinated fashion. Many posttranscriptional mechanisms regulate the fate of maternal mRNAs, but it is less understood how translational control shapes early embryogenesis. In eukaryotes, translation starts at the mRNA 5′ end, consisting of the 5′ cap and 5′ untranslated region (UTR). Protein synthesis is primarily regulated at the translation initiation step by elements within the 5′UTR. However, the role of 5′UTRs in regulating the dynamics of mRNA translation during vertebrate embryogenesis remains unexplored. For example, all vertebrate ribosomal protein (RP) mRNAs harbor a conserved terminal oligopyrimidine tract (TOP) in their 5′UTR. RP levels must be tightly controlled to ensure proper organismal development, but if and how the TOP motif mediates RP mRNA translational regulation during embryogenesis is unclear. Overall, we lack a systematic understanding of the regulatory information contained in 5′UTRs. In this work, I aim to uncover the 5′UTR in vivo rules for mRNA translational regulation during zebrafish embryogenesis. I propose to apply imaging and biochemical approaches to characterise the role of the TOP motif in RP mRNA translational regulation during embryogenesis and identify the trans-acting factor(s) that bind(s) to it (Aim 1). To systematically assess the contribution of 5′UTRs to mRNA translational regulation during zebrafish embryogenesis, I will couple a massively parallel reporter assay of 5′UTRs to polysome profiling (Aim 2). By integrating the translational behaviour of 5′UTR reporters throughout embryogenesis with sequence-based regression models, I anticipate to uncover novel cis-regulatory elements in 5′UTRs with developmental roles.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITAT BASEL","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229261","id":"893787","acronym":"HOLYHOST","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Welfare and Hosting buildings in the “Holy Land” between the 4th and the 7th c. AD","startDate":"2020-10-01","endDate":"2022-09-30","projectUrl":"","objective":"Between the 4th and the 7th century AD, many hospices dedicated to the poor, elderly, strangers and travelers were built in the countryside, along roads, around and inside cities. They were commissioned by the Church, rich pious men and women concerned by the redeem of their sins, as well as emperors who saw this as a guarantee of social stability. Welfare is thus an important phenomena of Late Antiquity, abundantly mentioned by ancient literary sources and inscriptions, particularly in the eastern part of the Empire. However, the buildings that provided shelter and care to the needy have not yet received sufficient attention from archaeologists. Except for buildings which were identified by their inventors as hostels dedicated to pilgrims, they are still invisible in the field. \nThe aim of the HOLYHOST research project is to bring this social history’s main topic on the field of archaeology. It will address the welfare issue through the archaeological and architectural survey and study of Ancient welfare and hosting establishments’ remains, in the Holy Land (Palestine and Jordan) and around. This work will contribute to a better understanding of the practices linked to hospitality, welfare, accommodation and care in Antiquity. Moreover, such establishments served as models for medieval and modern Islamic, Jewish and Christian waqf institutions (religious endowment), and welfare continues to be highly relevant nowadays, through issues still at the heart of contemporary challenges debated in Europe: poverty, social exclusion, migrant crisis, principle of reception and hospitality. This interdisciplinary and diachronic research project will thus offer many new research perspectives, in terms of history of architecture, evolution of care practices, social and political regulations.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE PARIS I PANTHEON-SORBONNE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229282","id":"896189","acronym":"MICADO","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Microbial contribution to continental wetland carbon budget","startDate":"2021-01-04","endDate":"2023-01-03","projectUrl":"","objective":"Continental wetlands are major carbon dioxide sinks but the second largest source of methane. Monitoring of wetland methane emissions revealed large inter-site variability that is hard to explain in the framework of current biogeochemical theories. Methane production in wetlands is an anaerobic microbial driven process involving a complex set of microbial metabolisms depending on the availability of (i) energy (via the presence of specific redox couples), (ii) organic substrates and (iii) specific microbial communities. To understand the complexity of microbial drivers on wetland methane emissions and quantify their contribution, the MICADO project will set up a multidisciplinary approach linking isotope organic geochemistry and environmental microbiology to assess microbial functioning in situ. As an organic geochemist I have developed an innovative approach to trace in situ microbial activity via compound specific carbon isotope analysis of microbe macromolecules and organic metabolites. The host institution is a leader in France in environmental microbiology and biogeochemistry developing high-throughput metagenomics and microbial rate assessments, for which I will be trained during the MICADO project. These techniques are highly complementary and combined they will provide a comprehensive knowledge on microbial metabolisms involved in organic matter degradation encompassing their complexity and interactions. This will revisit the relationships between organic substrate availability and microbial communities and will contribute at estimating the impact of microbial activity on wetland methane emissions. This project will give me the opportunity to acquire fundamental knowledge and to develop original lines of research that will consolidate my position as an independent scientist in biogeochemistry.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"CENTRE NATIONAL DE LA RECHERCHE SCIENTIFIQUE CNRS","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229249","id":"891624","acronym":"CuTAN","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Copper-Catalyzed Multicomponent Reactions in Tandem Processes for Target Molecule Synthesis","startDate":"2021-02-01","endDate":"2023-01-31","projectUrl":"","objective":"The invention of processes that can form several bonds, stereocentres and rings in a single process is key to a sustainable future in synthetic chemistry. Multicomponent reactions and tandem procedures are two strategies that enable the rapid build-up of molecular complexity from simple reagents. By combining these two strategies into a single procedure, the diversity, complexity and value of products can be further enhanced along with the efficiency and economy of their construction. In this project, Dr Satpathi will develop novel copper-catalyzed multicomponent couplings of unsaturated hydrocarbons (e.g. allenes, enynes) with imines and boron reagents. These procedures will provide high-value amine products with universally high regio-, diastero- and enantiocontrol. The products will bear a variety of synthetic handles, for example, amino, alkynyl/alkenyl, and boryl groups, thus the products are primed for subsequent transformation. Dr Satpathi will exploit this functionality in tandem intramolecular couplings (e.g. intramolecular Suzuki/Buchwald-Hartwig reactions) to provide core cyclic structures of drug molecules and natural products. Thus, through a tandem procedure of; 1) copper-catalyzed borofunctionalization, and; 2) subsequent transition-metal catalyzed cyclization, he will gain efficient access to highly sought-after complex molecules. Overall, the process will provide high-value, chiral, cyclic motifs from abundant, achiral, linear substrates. Finally, Dr Satpathi has identified the phthalide-isoquinoline family of alkaloids as target molecules to display the power of his tandem methodology. Dr Satpathi has devised a novel route, which begins with our tandem multifunctionalization/cyclization reaction, to provide a range of these important alkaloids. The chosen alkaloids are of particular interest as they display a range of bioactivities – for example as natural products, receptor antagonists and on-market drugs.","totalCost":"212933,76","ecMaxContribution":"212933,76","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"THE UNIVERSITY OF MANCHESTER","coordinatorCountry":"UK","participants":"","participantCountries":"","subjects":""} -{"rcn":"229239","id":"887259","acronym":"ALEHOOP","status":"SIGNED","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3","frameworkProgramme":"H2020","title":"Biorefineries for the valorisation of macroalgal residual biomass and legume processing by-products to obtain new protein value chains for high-value food and feed applications","startDate":"2020-06-01","endDate":"2024-05-31","projectUrl":"","objective":"ALEHOOP provides the demonstration at pilot scale of both sustainable macroalgae and legume-based biorefineries for the recovery of low-cost dietary proteins from alga-based and plant residual biomass and their validation to meet market requirements of consumers and industry in the food and feed sectors. In these sectors, consumers are demanding affordable functional natural proteins from alternative sources and industry is demanding low-cost bio-based protein formulations with better performance and higher sustainability. \nCurrent protein demand for the 7.3 billion inhabitants of the world is approximately 202 Mt. Due to the rise in meat consumption more proteins are therefore required for animal feeding. To satisfy the current protein demand, Europe imports over 30 Mt of soy from the Americas each year mainly for animal feeding, entailing 95% dependency of EU on imported soy. Current sources of proteins are becoming unsustainable from an economic and environmental perspective for Europe resulting in concerns for sustainability and food security and leading to search for new alternative proteins. \nALEHOOP addresses the obtaining of proteins from green macroalgal blooms, brown seaweed by-products from algae processors and legume processing by-products (peas, lupines, beans and lentils) as alternative protein sources for animal feeding (case of green seaweed) and food applications (case of brown seaweed and legume by-products), since they are low cost and under-exploited biomass that do not compete with traditional food crops for space and resources. This will reduce EU´s dependency on protein imports and contribute to our raw material security. The new proteins will be validated in foods for elderly, sporty and overweight people, vegetarians and healthy consumers as well as for animal feed creating cross-sectorial interconnection between these value chains and supporting the projected business plan.","totalCost":"6718370","ecMaxContribution":"5140274,41","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"CONTACTICA S.L.","coordinatorCountry":"ES","participants":"CENTIV GMBH;ALGINOR ASA;FUNDACION TECNALIA RESEARCH & INNOVATION;INDUKERN,S.A.;ASOCIACION NACIONAL DE FABRICANTES DE CONSERVAS DE PESCADOS Y MARISCOS-CENTRO TECNICO NACIONAL DE CONSERVACION DE PRODUCTOS DE LA PESCA;BIOZOON GMBH;EIGEN VERMOGEN VAN HET INSTITUUT VOOR LANDBOUW- EN VISSERIJONDERZOEK;BIOSURYA SL;VYZKUMNY USTAV VETERINARNIHO LEKARSTVI;NUTRITION SCIENCES;TECHNOLOGICAL UNIVERSITY DUBLIN;GARLAN, S.COOP.;ISANATUR SPAIN SL;UNIVERSIDAD DE VIGO;UNIVERSIDAD DE CADIZ","participantCountries":"DE;NO;ES;BE;CZ;IE","subjects":""} -{"rcn":"229258","id":"892834","acronym":"DENVPOC","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"qPCR Microfluidics point-of-care platform for dengue diagnosis","startDate":"2020-05-18","endDate":"2022-05-17","projectUrl":"","objective":"As a result of Global climate change and fast urbanization, global outbreaks of Dengue (DENV)/ Zika(ZIKV)/Chikungunya(CHIKV) virus have the potential to occur. The most common pathway of these infections in humans is through the female Aedes mosquito vector. DENV is an exanthematous febrile disease with varied clinical manifestations and progressions . Due to similarities in symptoms between DENV and ZIKV and CHIKV, it is difficult to make a differential diagnosis, impeding appropriate, timely medical intervention. Furthermore, cross-reactivity with ZIKV, which was recently related to microcephaly, is a serious issue. In 2016, in Brazil alone, there were 4180 microcephaly cases reported instead of 163 cases, more in line with yearly expected projections , , Thus, the sooner an accurate diagnostic which differentiates DENV from the other manifestations is critical; most especially at the early stages of the infection, to have a reliable diagnosis in pregnant women. In 2016, the OMS emergency committee declared that the outbreaks and the potentially resultant neurological disorders in Brazil were an important international state of emergency in public health, as a result of the associated secondary effects; these diseases became a Global concern. This project allows developing a highly and fast Multiplex qPCR POC platform by using FASTGENE technology with a minimal amount of patient serotype. It would reduce the time of analysis (30 to 90’ for a standard) and costs. Additionally, the sample preprocessing and thermalization will shorten real-time PCR amplification time and will be integrated within the microfluidic systems. This platform can result in a commercialized product whereupon a main market target would be pregnant women and people living or traveling through/from outbreak risk areas.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-SE","coordinator":"BFORCURE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229280","id":"895716","acronym":"DoMiCoP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"The Diffusion of Migration Control Practice. Actors, Processes and Effects.","startDate":"2021-03-01","endDate":"2023-02-28","projectUrl":"","objective":"DoMiCoP develops new understandings and perspectives to study migration control in practice in the European Union by asking one main question: how and why do communities of practice develop and diffuse the knowledge required to put migration control into action? Unlike the nexus between expert knowledge, epistemic communities and policy formulation, the nexus between everyday knowledge, communities of practice and policy implementation has not yet received systematic scholarly attention. My project bridges that gap by focusing on intermediate arenas in which communities of practice take shape most notably the meetings and trainings that gather state and non-state actors involved in putting asylum, detention and removal into practice. By building on field-based methodologies (interviews and participant observations), DoMiCoP sheds ethnographic light on the role that ‘learning from abroad’ plays in the implementation of migration control in the EU. My project’s aim is threefold: 1) Identifying arenas at intermediate levels in which communities of practice take shape; 2) Analysing the communities of practice by focusing on the configurations of actors and organizations involved, the motivations underlying their involvement, the process of knowledge development in interaction, the conflicts and negotiations; 3) Revealing the role of non-state organizations (private for profit and not-for-profit). From a theoretical point of view, this project goes beyond the classical view of the implementation as a test to assess the effectiveness of policy transfers towards an analysis of policy transfer at that level of policy-making. From an empirical point of view, the project expands knowledge about less-studied venues of policy-making and provides original thick descriptions. From a methodological point of view, the project engages with qualitative methods for the study of policy diffusion and aims at responding to their main challenges through participant observation.","totalCost":"163673,28","ecMaxContribution":"163673,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"EUROPEAN UNIVERSITY INSTITUTE","coordinatorCountry":"IT","participants":"","participantCountries":"","subjects":""} -{"rcn":"229297","id":"954782","acronym":"MiniLLock","status":"SIGNED","programme":"H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.","topics":"EIC-SMEInst-2018-2020","frameworkProgramme":"H2020","title":"Mini Launch Lock devices for small satellites","startDate":"2020-05-01","endDate":"2022-04-30","projectUrl":"","objective":"Space industry is experiencing the most important paradigm shift in its history with the rise of small satellites and megaconstellations.\nSatellite miniaturization requires to reduce significantly production and orbit launching costs. To address the\nnew challenge of this manufacturing process and switch from craftsmanship to industrialization, space industry is turning\ntowards other domains looking for new solutions, disruptive technologies, and manufacturing process.\nMini Launch Lock devices for small satellites (MiniLLock) proposes innovative actuators on the cutting edge of customer\ndemand. They offer plug and play solutions that can directly be integrated into industry for satellites robotized production.\nMiniLLock is smaller, lighter, safer, with a longer lifetime and generates significantly less shocks and vibrations than\nstandard actuators such as electromagnet and pyrotechnics. MiniLLock offers performances which have never been reached\nwith any other materials.\nNimesis is the only company that can provide such cost-effective actuators suitable to small satellite with high performances\nand reliability, enabling features previously impossible.\nMiniLLock will accelerate and leverage the commercialization of Nimesis technology and ensure Europe worldwide\nleadership\nand independence in the new space emergent environment.\nNimesis ambitions to become the global leader of this domain with a turnover of € 26 million and a market share of 28% in\n2027.","totalCost":"2413543,75","ecMaxContribution":"1689480,63","call":"H2020-EIC-SMEInst-2018-2020-3","fundingScheme":"SME-2b","coordinator":"NIMESIS TECHNOLOGY SARL","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229299","id":"101003374","acronym":"NOPHOS","status":"SIGNED","programme":"H2020-EU.4.","topics":"WF-02-2019","frameworkProgramme":"H2020","title":"Unravelling protein phosphorylation mechanisms and phosphoproteome changes under nitrosative stress conditions in E.coli","startDate":"2020-07-01","endDate":"2022-06-30","projectUrl":"","objective":"Currently, we face a global antibiotic resistance crisis aggravated by the slow development of more effective and anti-resistance promoting therapeutical solutions. Protein phosphorylation (PP) has recently emerged as one of the major post-translational modification in bacteria, involved in the regulation of multiple physiological processes. In this MSCA individual fellowship application we aim to bridge the current gap in the field for prokaryotes by unravelling the unknown regulatory role of PP on proteins involved in nitrosative stress (NS) detoxification in the model bacterium E.coli. We propose to examine for the first time both global protein modifications (e.g. phosphoproteomics) under nitrogen species stress, as well as characterize PP in individual proteins involved in NS response. We will construct a network model that reflect the phosphoproteomic changes upon NS in E.coli, that may pave the way for the design of new bacterial targets. Understanding how bacteria respond to the chemical weapons of the human innate system is fundamental to develop efficient therapies. We will pioneer research on the mechanism and the regulation of nitric oxide detoxification proteins already identified as phosphorylated, by analyzing how this modification influences their stability and activity in vitro and in vivo. This project opens up new research paths on bacterial detoxification systems and signalling in general, addressing for the first time the role of PP in these processes. The proposal brings together transversal and scientific skills that will enable the researcher to lead the development of this emerging field and position herself as an expert in the area, and aims at establishing the importance of PP in NO microbial response, a novelty in this field.","totalCost":"147815,04","ecMaxContribution":"147815,04","call":"H2020-WF-02-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSIDADE NOVA DE LISBOA","coordinatorCountry":"PT","participants":"","participantCountries":"","subjects":""} \ No newline at end of file +{"id":"894593","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019"} +{"id":"897004","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"896300","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"892890","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"886828","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"886776","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4"} +{"id":"895426","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"898218","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"893787","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"896189","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"891624","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"887259","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3"} +{"id":"892834","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"895716","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"954782","programme":"H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.","topics":"EIC-SMEInst-2018-2020"} +{"id":"101003374","programme":"H2020-EU.4.","topics":"WF-02-2019"} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz index 71f132ad1..5672d6044 100644 Binary files a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz differ diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java index 2caa66db4..38ffd28fe 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java @@ -7,6 +7,7 @@ import java.io.IOException; import java.io.OutputStreamWriter; import java.nio.charset.StandardCharsets; import java.sql.ResultSet; +import java.sql.SQLException; import java.util.Arrays; import java.util.List; import java.util.function.Consumer; @@ -32,11 +33,11 @@ public class ReadBlacklistFromDB implements Closeable { private final DbClient dbClient; private static final Log log = LogFactory.getLog(ReadBlacklistFromDB.class); - private final Configuration conf; + private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private final static String query = "SELECT source_type, unnest(original_source_objects) as source, " + + private static final String QUERY = "SELECT source_type, unnest(original_source_objects) as source, " + "target_type, unnest(original_target_objects) as target, " + "relationship FROM blacklist WHERE status = 'ACCEPTED'"; @@ -60,12 +61,12 @@ public class ReadBlacklistFromDB implements Closeable { dbPassword)) { log.info("Processing blacklist..."); - rbl.execute(query, rbl::processBlacklistEntry); + rbl.execute(QUERY, rbl::processBlacklistEntry); } } - public void execute(final String sql, final Function> producer) throws Exception { + public void execute(final String sql, final Function> producer) { final Consumer consumer = rs -> producer.apply(rs).forEach(r -> writeRelation(r)); @@ -99,7 +100,7 @@ public class ReadBlacklistFromDB implements Closeable { return Arrays.asList(direct, inverse); - } catch (final Exception e) { + } catch (final SQLException e) { throw new RuntimeException(e); } } @@ -112,12 +113,14 @@ public class ReadBlacklistFromDB implements Closeable { public ReadBlacklistFromDB( final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword) - throws Exception { + throws IOException { this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); - this.conf = new Configuration(); - this.conf.set("fs.defaultFS", hdfsNameNode); - FileSystem fileSystem = FileSystem.get(this.conf); + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); Path hdfsWritePath = new Path(hdfsPath); FSDataOutputStream fsDataOutputStream = null; if (fileSystem.exists(hdfsWritePath)) { @@ -133,7 +136,7 @@ public class ReadBlacklistFromDB implements Closeable { try { writer.write(OBJECT_MAPPER.writeValueAsString(r)); writer.newLine(); - } catch (final Exception e) { + } catch (final IOException e) { throw new RuntimeException(e); } } diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java index 91bcb9d1c..38ef63d27 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java @@ -114,10 +114,8 @@ public class SparkRemoveBlacklistedRelationJob { .map((MapFunction, Relation>) c -> { Relation ir = c._1(); Optional obl = Optional.ofNullable(c._2()); - if (obl.isPresent()) { - if (ir.equals(obl.get())) { - return null; - } + if (obl.isPresent() && ir.equals(obl.get())) { + return null; } return ir; }, Encoders.bean(Relation.class)) diff --git a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java index 585848589..058ea271c 100644 --- a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java +++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java @@ -62,7 +62,7 @@ public class BlackListTest { } @Test - public void noRemoveTest() throws Exception { + void noRemoveTest() throws Exception { SparkRemoveBlacklistedRelationJob .main( new String[] { @@ -89,7 +89,7 @@ public class BlackListTest { } @Test - public void removeNoMergeMatchTest() throws Exception { + void removeNoMergeMatchTest() throws Exception { SparkRemoveBlacklistedRelationJob .main( new String[] { @@ -128,7 +128,7 @@ public class BlackListTest { } @Test - public void removeMergeMatchTest() throws Exception { + void removeMergeMatchTest() throws Exception { SparkRemoveBlacklistedRelationJob .main( new String[] { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java index 429eb7d11..584438d44 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java @@ -9,19 +9,24 @@ import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.DateUtils; +import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerRelatedDatasource; +import eu.dnetlib.broker.objects.OaBrokerTypedValue; import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; public class EventFactory { - private final static String PRODUCER_ID = "OpenAIRE"; + private static final String PRODUCER_ID = "OpenAIRE"; - private final static String[] DATE_PATTERNS = { + private static final String[] DATE_PATTERNS = { "yyyy-MM-dd" }; + private EventFactory() { + } + public static Event newBrokerEvent(final UpdateInfo updateInfo) { final Event res = new Event(); @@ -61,7 +66,7 @@ public class EventFactory { map.setTargetResultId(target.getOpenaireId()); final List titles = target.getTitles(); - if (titles.size() > 0) { + if (!titles.isEmpty()) { map.setTargetResultTitle(titles.get(0)); } @@ -70,8 +75,12 @@ public class EventFactory { map.setTargetDateofacceptance(date); } - map.setTargetSubjects(target.getSubjects().stream().map(s -> s.getValue()).collect(Collectors.toList())); - map.setTargetAuthors(target.getCreators().stream().map(a -> a.getFullname()).collect(Collectors.toList())); + map + .setTargetSubjects( + target.getSubjects().stream().map(OaBrokerTypedValue::getValue).collect(Collectors.toList())); + map + .setTargetAuthors( + target.getCreators().stream().map(OaBrokerAuthor::getFullname).collect(Collectors.toList())); // PROVENANCE INFO map.setTrust(updateInfo.getTrust()); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java index 89fc2e703..f0aa6491f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java @@ -10,15 +10,11 @@ import org.apache.spark.sql.Encoder; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; -import org.apache.spark.sql.TypedColumn; import org.apache.spark.sql.expressions.Aggregator; import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.broker.model.Event; import eu.dnetlib.dhp.broker.oa.util.ClusterUtils; @@ -88,8 +84,7 @@ class CountAggregator extends Aggregator, Tuple2 merge(final Tuple2 arg0, final Tuple2 arg1) { - final String s = StringUtils.defaultIfBlank(arg0._1, arg1._1); - return new Tuple2<>(s, arg0._2 + arg1._2); + return doMerge(arg0, arg1); } @Override @@ -99,6 +94,10 @@ class CountAggregator extends Aggregator, Tuple2 reduce(final Tuple2 arg0, final Tuple2 arg1) { + return doMerge(arg0, arg1); + } + + private Tuple2 doMerge(final Tuple2 arg0, final Tuple2 arg1) { final String s = StringUtils.defaultIfBlank(arg0._1, arg1._1); return new Tuple2<>(s, arg0._2 + arg1._2); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java index 05ff2aa38..0fbc763e0 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.broker.oa; +import java.io.IOException; import java.util.Date; import java.util.HashMap; import java.util.Map; @@ -98,7 +99,6 @@ public class IndexEventSubsetJob { .javaRDD(); final Map esCfg = new HashMap<>(); - // esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54"); esCfg.put("es.index.auto.create", "false"); esCfg.put("es.nodes", indexHost); @@ -114,11 +114,11 @@ public class IndexEventSubsetJob { log.info("*** Deleting old events"); final String message = deleteOldEvents(brokerApiBaseUrl, now - 1000); - log.info("*** Deleted events: " + message); + log.info("*** Deleted events: {}", message); } - private static String deleteOldEvents(final String brokerApiBaseUrl, final long l) throws Exception { + private static String deleteOldEvents(final String brokerApiBaseUrl, final long l) throws IOException { final String url = brokerApiBaseUrl + "/api/events/byCreationDate/0/" + l; final HttpDelete req = new HttpDelete(url); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java index 80549e1ce..e8ef5dd3e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java @@ -33,11 +33,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.broker.model.ConditionParams; -import eu.dnetlib.dhp.broker.model.Event; -import eu.dnetlib.dhp.broker.model.MappedFields; -import eu.dnetlib.dhp.broker.model.Notification; -import eu.dnetlib.dhp.broker.model.Subscription; +import eu.dnetlib.dhp.broker.model.*; import eu.dnetlib.dhp.broker.oa.util.ClusterUtils; import eu.dnetlib.dhp.broker.oa.util.NotificationGroup; import eu.dnetlib.dhp.broker.oa.util.SubscriptionUtils; @@ -89,9 +85,9 @@ public class IndexNotificationsJob { final List subscriptions = listSubscriptions(brokerApiBaseUrl); - log.info("Number of subscriptions: " + subscriptions.size()); + log.info("Number of subscriptions: {}", subscriptions.size()); - if (subscriptions.size() > 0) { + if (!subscriptions.isEmpty()) { final Encoder ngEncoder = Encoders.bean(NotificationGroup.class); final Encoder nEncoder = Encoders.bean(Notification.class); final Dataset notifications = ClusterUtils @@ -106,7 +102,6 @@ public class IndexNotificationsJob { .javaRDD(); final Map esCfg = new HashMap<>(); - // esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54"); esCfg.put("es.index.auto.create", "false"); esCfg.put("es.nodes", indexHost); @@ -122,7 +117,7 @@ public class IndexNotificationsJob { log.info("*** Deleting old notifications"); final String message = deleteOldNotifications(brokerApiBaseUrl, startTime - 1000); - log.info("*** Deleted notifications: " + message); + log.info("*** Deleted notifications: {}", message); log.info("*** sendNotifications (emails, ...)"); sendNotifications(brokerApiBaseUrl, startTime - 1000); @@ -174,28 +169,28 @@ public class IndexNotificationsJob { return false; } - if (conditions.containsKey("targetDateofacceptance") && !conditions + if (conditions.containsKey("targetDateofacceptance") && conditions .get("targetDateofacceptance") .stream() - .anyMatch( + .noneMatch( c -> SubscriptionUtils .verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) { return false; } if (conditions.containsKey("targetResultTitle") - && !conditions + && conditions .get("targetResultTitle") .stream() - .anyMatch(c -> SubscriptionUtils.verifySimilar(map.getTargetResultTitle(), c.getValue()))) { + .noneMatch(c -> SubscriptionUtils.verifySimilar(map.getTargetResultTitle(), c.getValue()))) { return false; } if (conditions.containsKey("targetAuthors") - && !conditions + && conditions .get("targetAuthors") .stream() - .allMatch(c -> SubscriptionUtils.verifyListSimilar(map.getTargetAuthors(), c.getValue()))) { + .noneMatch(c -> SubscriptionUtils.verifyListSimilar(map.getTargetAuthors(), c.getValue()))) { return false; } @@ -207,7 +202,7 @@ public class IndexNotificationsJob { } - private static List listSubscriptions(final String brokerApiBaseUrl) throws Exception { + private static List listSubscriptions(final String brokerApiBaseUrl) throws IOException { final String url = brokerApiBaseUrl + "/api/subscriptions"; final HttpGet req = new HttpGet(url); @@ -222,7 +217,7 @@ public class IndexNotificationsJob { } } - private static String deleteOldNotifications(final String brokerApiBaseUrl, final long l) throws Exception { + private static String deleteOldNotifications(final String brokerApiBaseUrl, final long l) throws IOException { final String url = brokerApiBaseUrl + "/api/notifications/byDate/0/" + l; final HttpDelete req = new HttpDelete(url); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java index 380a689e4..0c74d8a6d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java @@ -7,6 +7,7 @@ import java.util.Map; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.elasticsearch.spark.rdd.api.java.JavaEsSpark; @@ -61,7 +62,7 @@ public class IndexOnESJob { final JavaRDD inputRdd = ClusterUtils .readPath(spark, eventsPath, Event.class) - .map(IndexOnESJob::eventAsJsonString, Encoders.STRING()) + .map((MapFunction) IndexOnESJob::eventAsJsonString, Encoders.STRING()) .javaRDD(); final Map esCfg = new HashMap<>(); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java index e061c0d3b..b5c891bb8 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java @@ -77,9 +77,8 @@ public class PartitionEventsByDsIdJob { } log.info("validOpendoarIds: {}", validOpendoarIds); - runWithSparkSession(conf, isSparkSessionManaged, spark -> { - - ClusterUtils + runWithSparkSession( + conf, isSparkSessionManaged, spark -> ClusterUtils .readPath(spark, eventsPath, Event.class) .filter((FilterFunction) e -> StringUtils.isNotBlank(e.getMap().getTargetDatasourceId())) .filter((FilterFunction) e -> e.getMap().getTargetDatasourceId().startsWith(OPENDOAR_NSPREFIX)) @@ -92,9 +91,7 @@ public class PartitionEventsByDsIdJob { .partitionBy("group") .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(partitionPath); - - }); + .json(partitionPath)); renameSubDirs(partitionPath); } @@ -102,14 +99,14 @@ public class PartitionEventsByDsIdJob { private static void renameSubDirs(final String path) throws IOException { final FileSystem fs = FileSystem.get(new Configuration()); - log.info("** Renaming subdirs of " + path); + log.info("** Renaming subdirs of {}", path); for (final FileStatus fileStatus : fs.listStatus(new Path(path))) { if (fileStatus.isDirectory()) { final Path oldPath = fileStatus.getPath(); final String oldName = oldPath.getName(); if (oldName.contains("=")) { final Path newPath = new Path(path + "/" + StringUtils.substringAfter(oldName, "=")); - log.info(" * " + oldPath.getName() + " -> " + newPath.getName()); + log.info(" * {} -> {}", oldPath.getName(), newPath.getName()); fs.rename(oldPath, newPath); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java index 61ab5e250..2a247b7aa 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java @@ -105,7 +105,7 @@ public class PrepareRelatedDatasourcesJob { .filter((FilterFunction) r -> !ClusterUtils.isDedupRoot(r.getId())) .filter((FilterFunction) r -> r.getDataInfo().getDeletedbyinference()) .map( - (MapFunction) r -> DatasourceRelationsAccumulator.calculateTuples(r), + (MapFunction) DatasourceRelationsAccumulator::calculateTuples, Encoders.bean(DatasourceRelationsAccumulator.class)) .flatMap( (FlatMapFunction>) acc -> acc diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java index fba82aa8c..87fed7db7 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java @@ -26,7 +26,7 @@ public abstract class UpdateMatcher { private final BiConsumer compileHighlightFunction; private final Function highlightToStringFunction; - public UpdateMatcher(final int maxNumber, final Function topicFunction, + protected UpdateMatcher(final int maxNumber, final Function topicFunction, final BiConsumer compileHighlightFunction, final Function highlightToStringFunction) { this.maxNumber = maxNumber; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java index 2f73a2448..88ad48178 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java @@ -14,11 +14,11 @@ import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; public abstract class AbstractEnrichMissingDataset extends UpdateMatcher { - public AbstractEnrichMissingDataset(final Topic topic) { + protected AbstractEnrichMissingDataset(final Topic topic) { super(10, rel -> topic, (p, rel) -> p.getDatasets().add(rel), - rel -> rel.getOpenaireId()); + OaBrokerRelatedDataset::getOpenaireId); } protected abstract boolean filterByType(String relType); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java index ab2735f2a..440602772 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java @@ -15,7 +15,7 @@ public class EnrichMissingProject extends UpdateMatcher { super(20, prj -> Topic.ENRICH_MISSING_PROJECT, (p, prj) -> p.getProjects().add(prj), - prj -> prj.getOpenaireId()); + OaBrokerProject::getOpenaireId); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java index 85086a6df..2e523da2f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java @@ -18,7 +18,7 @@ public class EnrichMoreProject extends UpdateMatcher { super(20, prj -> Topic.ENRICH_MORE_PROJECT, (p, prj) -> p.getProjects().add(prj), - prj -> prj.getOpenaireId()); + OaBrokerProject::getOpenaireId); } @Override @@ -32,7 +32,7 @@ public class EnrichMoreProject extends UpdateMatcher { final Set existingProjects = target .getProjects() .stream() - .map(p -> p.getOpenaireId()) + .map(OaBrokerProject::getOpenaireId) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java index 7ba3e5e02..a709eea30 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java @@ -14,11 +14,11 @@ import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; public abstract class AbstractEnrichMissingPublication extends UpdateMatcher { - public AbstractEnrichMissingPublication(final Topic topic) { + protected AbstractEnrichMissingPublication(final Topic topic) { super(10, rel -> topic, (p, rel) -> p.getPublications().add(rel), - rel -> rel.getOpenaireId()); + OaBrokerRelatedPublication::getOpenaireId); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java index a638024bc..a75666027 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java @@ -16,7 +16,7 @@ public class EnrichMissingSoftware super(10, s -> Topic.ENRICH_MISSING_SOFTWARE, (p, s) -> p.getSoftwares().add(s), - s -> s.getOpenaireId()); + OaBrokerRelatedSoftware::getOpenaireId); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java index a6cd34359..ec340b42f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java @@ -18,7 +18,7 @@ public class EnrichMoreSoftware extends UpdateMatcher { super(10, s -> Topic.ENRICH_MORE_SOFTWARE, (p, s) -> p.getSoftwares().add(s), - s -> s.getOpenaireId()); + OaBrokerRelatedSoftware::getOpenaireId); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java index e834d1dde..125eac862 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java @@ -20,7 +20,7 @@ public class EnrichMissingAuthorOrcid extends UpdateMatcher { super(40, aut -> Topic.ENRICH_MISSING_AUTHOR_ORCID, (p, aut) -> p.getCreators().add(aut), - aut -> aut.getOrcid()); + OaBrokerAuthor::getOrcid); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java index 4e4003890..f32cec90d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java @@ -23,7 +23,7 @@ public class EnrichMissingPid extends UpdateMatcher { protected List findDifferences(final OaBrokerMainEntity source, final OaBrokerMainEntity target) { - if (target.getPids().size() > 0) { + if (!target.getPids().isEmpty()) { return Arrays.asList(); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java index cb3ea5464..f07bbd52f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java @@ -35,7 +35,7 @@ public class EnrichMissingSubject extends UpdateMatcher { } }, (p, s) -> p.getSubjects().add(s), - s -> subjectAsString(s)); + EnrichMissingSubject::subjectAsString); } @Override @@ -49,7 +49,7 @@ public class EnrichMissingSubject extends UpdateMatcher { final Set existingSubject = target .getSubjects() .stream() - .map(s -> subjectAsString(s)) + .map(EnrichMissingSubject::subjectAsString) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java index 46f6fa80c..585531095 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java @@ -33,7 +33,7 @@ public class EnrichMoreOpenAccess extends UpdateMatcher { .getInstances() .stream() .filter(i -> i.getLicense().equals(BrokerConstants.OPEN_ACCESS)) - .map(i -> i.getUrl()) + .map(OaBrokerInstance::getUrl) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java index 609437b9d..a98b96b99 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java @@ -18,7 +18,7 @@ public class EnrichMorePid extends UpdateMatcher { super(20, pid -> Topic.ENRICH_MORE_PID, (p, pid) -> p.getPids().add(pid), - pid -> pidAsString(pid)); + EnrichMorePid::pidAsString); } @Override @@ -32,7 +32,7 @@ public class EnrichMorePid extends UpdateMatcher { final Set existingPids = target .getPids() .stream() - .map(pid -> pidAsString(pid)) + .map(EnrichMorePid::pidAsString) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java index 1f6edf96e..b62b509c7 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java @@ -35,7 +35,7 @@ public class EnrichMoreSubject extends UpdateMatcher { } }, (p, s) -> p.getSubjects().add(s), - s -> subjectAsString(s)); + EnrichMoreSubject::subjectAsString); } @Override @@ -49,7 +49,7 @@ public class EnrichMoreSubject extends UpdateMatcher { final Set existingSubjects = target .getSubjects() .stream() - .map(pid -> subjectAsString(pid)) + .map(EnrichMoreSubject::subjectAsString) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java index 2055a014e..790ca4e61 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java @@ -12,6 +12,9 @@ import eu.dnetlib.dhp.schema.common.ModelSupport; public class BrokerConstants { + private BrokerConstants() { + } + public static final String OPEN_ACCESS = "OPEN"; public static final String IS_MERGED_IN_CLASS = ModelConstants.IS_MERGED_IN; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java index 7c4ca1d22..2e9c03990 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java @@ -24,6 +24,9 @@ public class ClusterUtils { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private ClusterUtils() { + } + public static void createDirIfMissing(final SparkSession spark, final String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java index 6f0a52244..bc37203d3 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.broker.oa.util; import java.util.ArrayList; import java.util.List; import java.util.Objects; +import java.util.function.Function; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; @@ -13,8 +14,6 @@ import org.dom4j.DocumentHelper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Function; - import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerExternalReference; import eu.dnetlib.broker.objects.OaBrokerInstance; @@ -46,6 +45,9 @@ public class ConversionUtils { private static final Logger log = LoggerFactory.getLogger(ConversionUtils.class); + private ConversionUtils() { + } + public static List oafInstanceToBrokerInstances(final Instance i) { if (i == null) { return new ArrayList<>(); @@ -69,7 +71,7 @@ public class ConversionUtils { return sp != null ? new OaBrokerTypedValue(classId(sp.getQualifier()), sp.getValue()) : null; } - public static final OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) { + public static OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) { if (d == null) { return null; } @@ -100,7 +102,7 @@ public class ConversionUtils { return res; } - public static final OaBrokerMainEntity oafResultToBrokerResult(final Result result) { + public static OaBrokerMainEntity oafResultToBrokerResult(final Result result) { if (result == null) { return null; } @@ -142,12 +144,12 @@ public class ConversionUtils { final String pids = author.getPid() != null ? author .getPid() .stream() - .filter(pid -> pid != null) + .filter(Objects::nonNull) .filter(pid -> pid.getQualifier() != null) .filter(pid -> pid.getQualifier().getClassid() != null) .filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID)) - .map(pid -> pid.getValue()) - .map(pid -> cleanOrcid(pid)) + .map(StructuredProperty::getValue) + .map(ConversionUtils::cleanOrcid) .filter(StringUtils::isNotBlank) .findFirst() .orElse(null) : null; @@ -187,7 +189,7 @@ public class ConversionUtils { return res; } - public static final OaBrokerProject oafProjectToBrokerProject(final Project p) { + public static OaBrokerProject oafProjectToBrokerProject(final Project p) { if (p == null) { return null; } @@ -206,14 +208,14 @@ public class ConversionUtils { res.setJurisdiction(fdoc.valueOf("/fundingtree/funder/jurisdiction")); res.setFundingProgram(fdoc.valueOf("//funding_level_0/name")); } catch (final DocumentException e) { - log.error("Error in record " + p.getId() + ": invalid fundingtree: " + ftree); + log.error("Error in record {}: invalid fundingtree: {}", p.getId(), ftree); } } return res; } - public static final OaBrokerRelatedSoftware oafSoftwareToBrokerSoftware(final Software sw) { + public static OaBrokerRelatedSoftware oafSoftwareToBrokerSoftware(final Software sw) { if (sw == null) { return null; } @@ -228,7 +230,7 @@ public class ConversionUtils { return res; } - public static final OaBrokerRelatedDatasource oafDatasourceToBrokerDatasource(final Datasource ds) { + public static OaBrokerRelatedDatasource oafDatasourceToBrokerDatasource(final Datasource ds) { if (ds == null) { return null; } @@ -241,7 +243,7 @@ public class ConversionUtils { } private static String first(final List list) { - return list != null && list.size() > 0 ? list.get(0) : null; + return list != null && !list.isEmpty() ? list.get(0) : null; } private static String kvValue(final KeyValue kv) { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java index c693be93c..658a42ac1 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java @@ -10,6 +10,8 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple3; @@ -39,7 +41,7 @@ public class DatasourceRelationsAccumulator implements Serializable { final Set collectedFromSet = r .getCollectedfrom() .stream() - .map(kv -> kv.getKey()) + .map(KeyValue::getKey) .filter(StringUtils::isNotBlank) .distinct() .collect(Collectors.toSet()); @@ -47,10 +49,10 @@ public class DatasourceRelationsAccumulator implements Serializable { final Set hostedBySet = r .getInstance() .stream() - .map(i -> i.getHostedby()) + .map(Instance::getHostedby) .filter(Objects::nonNull) .filter(kv -> !StringUtils.equalsIgnoreCase(kv.getValue(), "Unknown Repository")) - .map(kv -> kv.getKey()) + .map(KeyValue::getKey) .filter(StringUtils::isNotBlank) .distinct() .filter(id -> !collectedFromSet.contains(id)) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java index 103751f95..b2214e07e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java @@ -41,8 +41,6 @@ import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup; public class EventFinder { - private static final Logger log = LoggerFactory.getLogger(EventFinder.class); - private static final List> matchers = new ArrayList<>(); static { matchers.add(new EnrichMissingAbstract()); @@ -72,6 +70,9 @@ public class EventFinder { matchers.add(new EnrichMissingDatasetIsSupplementedBy()); } + private EventFinder() { + } + public static EventGroup generateEvents(final ResultGroup results, final Set dsIdWhitelist, final Set dsIdBlacklist, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java index adb1c753b..cf3562193 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java @@ -12,6 +12,9 @@ public class SubscriptionUtils { private static final long ONE_DAY = 86_400_000; + private SubscriptionUtils() { + } + public static boolean verifyListSimilar(final List list, final String value) { return list.stream().anyMatch(s -> verifySimilar(s, value)); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java index 72fe1b204..a49801f32 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java @@ -30,7 +30,9 @@ public class TrustUtils { } catch (final IOException e) { log.error("Error loading dedupConfig, e"); } + } + private TrustUtils() { } protected static float calculateTrust(final OaBrokerMainEntity r1, final OaBrokerMainEntity r2) { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java index 5a9cb5e09..d29414e52 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java @@ -88,14 +88,14 @@ public final class UpdateInfo { .getDatasources() .stream() .filter(ds -> ds.getRelType().equals(BrokerConstants.COLLECTED_FROM_REL)) - .map(ds -> ds.getName()) + .map(OaBrokerRelatedDatasource::getName) .findFirst() .orElse(""); final String provType = getSource() .getDatasources() .stream() .filter(ds -> ds.getRelType().equals(BrokerConstants.COLLECTED_FROM_REL)) - .map(ds -> ds.getType()) + .map(OaBrokerRelatedDatasource::getType) .findFirst() .orElse(""); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java index 8fa95abe5..45bfc785f 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.broker.oa.matchers; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.Arrays; @@ -72,7 +73,7 @@ class UpdateMatcherTest { final Collection> list = matcher .searchUpdatesForRecord(res, targetDs, Arrays.asList(p1, p2, p3, p4), null); - assertTrue(list.size() == 1); + assertEquals(1, list.size()); } @Test @@ -127,7 +128,7 @@ class UpdateMatcherTest { final Collection> list = matcher .searchUpdatesForRecord(res, targetDs, Arrays.asList(p1, p2, p3, p4), null); - assertTrue(list.size() == 1); + assertEquals(1, list.size()); } } diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java index 77a19af4c..550ded9f4 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.simple; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.List; @@ -32,7 +33,7 @@ class EnrichMissingPublicationDateTest { final OaBrokerMainEntity target = new OaBrokerMainEntity(); source.setPublicationdate("2018"); final List list = matcher.findDifferences(source, target); - assertTrue(list.size() == 1); + assertEquals(1, list.size()); } @Test diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java index 974baa28b..a8bc03e31 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java @@ -9,67 +9,67 @@ import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerTypedValue; -public class TrustUtilsTest { +class TrustUtilsTest { private static final double THRESHOLD = 0.95; @Test - public void rescaleTest_1() { + void rescaleTest_1() { verifyValue(-0.3, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_2() { + void rescaleTest_2() { verifyValue(0.0, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_3() { + void rescaleTest_3() { verifyValue(0.5, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_4() { + void rescaleTest_4() { verifyValue(0.95, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_5() { + void rescaleTest_5() { verifyValue(0.96, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_6() { + void rescaleTest_6() { verifyValue(0.97, 0.3f); } @Test - public void rescaleTest_7() { + void rescaleTest_7() { verifyValue(0.98, 0.45f); } @Test - public void rescaleTest_8() { + void rescaleTest_8() { verifyValue(0.99, 0.6f); } @Test - public void rescaleTest_9() { + void rescaleTest_9() { verifyValue(1.00, BrokerConstants.MAX_TRUST); } @Test - public void rescaleTest_10() { + void rescaleTest_10() { verifyValue(1.01, BrokerConstants.MAX_TRUST); } @Test - public void rescaleTest_11() { + void rescaleTest_11() { verifyValue(2.00, BrokerConstants.MAX_TRUST); } @Test - public void test() throws Exception { + void test() { final OaBrokerMainEntity r1 = new OaBrokerMainEntity(); r1.getTitles().add("D-NET Service Package: Data Import"); r1.getPids().add(new OaBrokerTypedValue("doi", "123")); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java index 647a1b9c8..6a9b21b00 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java @@ -20,6 +20,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; @@ -43,23 +44,26 @@ abstract class AbstractSparkAction implements Serializable { protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - public ArgumentApplicationParser parser; // parameters for the spark action - public SparkSession spark; // the spark session + public final ArgumentApplicationParser parser; // parameters for the spark action + public final SparkSession spark; // the spark session - public AbstractSparkAction(ArgumentApplicationParser parser, SparkSession spark) { + protected AbstractSparkAction(ArgumentApplicationParser parser, SparkSession spark) { this.parser = parser; this.spark = spark; } public List getConfigurations(ISLookUpService isLookUpService, String orchestrator) - throws ISLookUpException, DocumentException, IOException { + throws ISLookUpException, DocumentException, IOException, SAXException { final String xquery = String.format("/RESOURCE_PROFILE[.//DEDUPLICATION/ACTION_SET/@id = '%s']", orchestrator); String orchestratorProfile = isLookUpService.getResourceProfileByQuery(xquery); - final Document doc = new SAXReader().read(new StringReader(orchestratorProfile)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + + final Document doc = reader.read(new StringReader(orchestratorProfile)); final String actionSetId = doc.valueOf("//DEDUPLICATION/ACTION_SET/@id"); @@ -93,7 +97,7 @@ abstract class AbstractSparkAction implements Serializable { } abstract void run(ISLookUpService isLookUpService) - throws DocumentException, IOException, ISLookUpException; + throws DocumentException, IOException, ISLookUpException, SAXException; protected static SparkSession getSparkSession(SparkConf conf) { return SparkSession.builder().config(conf).getOrCreate(); @@ -139,9 +143,7 @@ abstract class AbstractSparkAction implements Serializable { c -> c .stream() .filter(Objects::nonNull) - .filter(kv -> ModelConstants.OPENORGS_NAME.equals(kv.getValue())) - .findFirst() - .isPresent()) + .anyMatch(kv -> ModelConstants.OPENORGS_NAME.equals(kv.getValue()))) .orElse(false); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java index 558c7c440..9d767c4d2 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java @@ -23,6 +23,9 @@ public class DatePicker { private static final int YEAR_LB = 1300; private static final int YEAR_UB = Year.now().getValue() + 5; + private DatePicker() { + } + public static Field pick(final Collection dateofacceptance) { final Map frequencies = dateofacceptance @@ -61,7 +64,7 @@ public class DatePicker { .entrySet() .stream() .filter(e -> e.getValue() >= acceptThreshold) - .map(e -> e.getKey()) + .map(Map.Entry::getKey) .collect(Collectors.toList()); // cannot find strong majority diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index 5ba6f6e6d..d65853aff 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -10,14 +10,11 @@ import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; -import eu.dnetlib.dhp.oa.dedup.model.Identifier; import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -25,11 +22,12 @@ import scala.Tuple2; public class DedupRecordFactory { - private static final Logger log = LoggerFactory.getLogger(DedupRecordFactory.class); - protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + private DedupRecordFactory() { + } + public static Dataset createDedupRecord( final SparkSession spark, final DataInfo dataInfo, @@ -67,7 +65,7 @@ public class DedupRecordFactory { value._1()._1(), value._2()._2()), Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))) .groupByKey( - (MapFunction, String>) entity -> entity._1(), Encoders.STRING()) + (MapFunction, String>) Tuple2::_1, Encoders.STRING()) .mapGroups( (MapGroupsFunction, T>) (key, values) -> entityMerger(key, values, ts, dataInfo, clazz), @@ -91,7 +89,7 @@ public class DedupRecordFactory { entity.mergeFrom(duplicate); if (ModelSupport.isSubClass(duplicate, Result.class)) { Result r1 = (Result) duplicate; - if (r1.getAuthor() != null && r1.getAuthor().size() > 0) + if (r1.getAuthor() != null && !r1.getAuthor().isEmpty()) authors.add(r1.getAuthor()); if (r1.getDateofacceptance() != null) dates.add(r1.getDateofacceptance().getValue()); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java index 5806e9fa4..d79d24653 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java @@ -10,6 +10,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.google.common.collect.Sets; @@ -25,6 +26,9 @@ public class DedupUtility { public static final String OPENORGS_ID_PREFIX = "openorgs____"; public static final String CORDA_ID_PREFIX = "corda"; + private DedupUtility() { + } + public static Map constructAccumulator( final DedupConfig dedupConf, final SparkContext context) { @@ -92,14 +96,16 @@ public class DedupUtility { } public static List getConfigurations(String isLookUpUrl, String orchestrator) - throws ISLookUpException, DocumentException { + throws ISLookUpException, DocumentException, SAXException { final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookUpUrl); final String xquery = String.format("/RESOURCE_PROFILE[.//DEDUPLICATION/ACTION_SET/@id = '%s']", orchestrator); String orchestratorProfile = isLookUpService.getResourceProfileByQuery(xquery); - final Document doc = new SAXReader().read(new StringReader(orchestratorProfile)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(orchestratorProfile)); final String actionSetId = doc.valueOf("//DEDUPLICATION/ACTION_SET/@id"); final List configurations = new ArrayList<>(); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java index 5506b5470..ea738836b 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.dedup; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -33,9 +34,11 @@ public class DispatchEntitiesSparkJob { String jsonConfiguration = IOUtils .toString( - DispatchEntitiesSparkJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json")); + Objects + .requireNonNull( + DispatchEntitiesSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -54,6 +57,7 @@ public class DispatchEntitiesSparkJob { String graphTableClassName = parser.get("graphTableClassName"); log.info("graphTableClassName: {}", graphTableClassName); + @SuppressWarnings("unchecked") Class entityClazz = (Class) Class.forName(graphTableClassName); SparkConf conf = new SparkConf(); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java index 58009bfcf..a19f86380 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java @@ -42,7 +42,7 @@ public class GroupEntitiesSparkJob { private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); - private final static String ID_JPATH = "$.id"; + private static final String ID_JPATH = "$.id"; private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @@ -92,7 +92,7 @@ public class GroupEntitiesSparkJob { spark .read() .textFile(toSeq(listEntityPaths(inputPath, sc))) - .map((MapFunction) s -> parseOaf(s), Encoders.kryo(OafEntity.class)) + .map((MapFunction) GroupEntitiesSparkJob::parseOaf, Encoders.kryo(OafEntity.class)) .filter((FilterFunction) e -> StringUtils.isNotBlank(ModelSupport.idFn().apply(e))) .groupByKey((MapFunction) oaf -> ModelSupport.idFn().apply(oaf), Encoders.STRING()) .agg(aggregator) @@ -188,7 +188,7 @@ public class GroupEntitiesSparkJob { try { return OBJECT_MAPPER.readValue(s, clazz); } catch (IOException e) { - throw new RuntimeException(e); + throw new IllegalArgumentException(e); } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java index dd9b16790..81cd30f88 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java @@ -15,13 +15,13 @@ public class IdGenerator implements Serializable { // pick the best pid from the list (consider date and pidtype) public static String generate(List> pids, String defaultID) { - if (pids == null || pids.size() == 0) + if (pids == null || pids.isEmpty()) return defaultID; Identifier bp = pids .stream() .min(Identifier::compareTo) - .get(); + .orElseThrow(() -> new IllegalStateException("unable to generate id")); String prefix = substringBefore(bp.getOriginalID(), "|"); String ns = substringBefore(substringAfter(bp.getOriginalID(), "|"), "::"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java index 1e13485e5..c9c9dd8fe 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java @@ -9,7 +9,6 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -17,6 +16,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.Block; @@ -63,7 +63,7 @@ public class SparkBlockStats extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws DocumentException, IOException, ISLookUpException { + throws DocumentException, IOException, ISLookUpException, SAXException { // read oozie parameters final String graphBasePath = parser.get("graphBasePath"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java index 0aaa1e662..93027e99a 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java @@ -6,7 +6,6 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -17,8 +16,6 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -78,7 +75,7 @@ public class SparkCopyOpenorgsSimRels extends AbstractSparkAction { saveParquet(rawRels, outputPath, SaveMode.Append); - log.info("Copied " + rawRels.count() + " Similarity Relations"); + log.info("Copied {} Similarity Relations", rawRels.count()); } private boolean filterOpenorgsRels(Relation rel) { diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java index 9ece43891..bf0b7f687 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java @@ -2,34 +2,21 @@ package eu.dnetlib.dhp.oa.dedup; import java.io.IOException; -import java.util.Optional; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.PairFunction; -import org.apache.spark.sql.*; -import org.apache.spark.sql.Dataset; -import org.dom4j.DocumentException; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import eu.dnetlib.pace.util.MapDocumentUtil; -import scala.Tuple2; public class SparkCopyRelationsNoOpenorgs extends AbstractSparkAction { diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java index b41507e95..6989ec54b 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java @@ -13,6 +13,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.EntityType; @@ -54,7 +55,7 @@ public class SparkCreateDedupRecord extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws ISLookUpException, DocumentException, IOException { + throws ISLookUpException, DocumentException, IOException, SAXException { final String graphBasePath = parser.get("graphBasePath"); final String isLookUpUrl = parser.get("isLookUpUrl"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java index bfc605039..95e3dff28 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java @@ -24,6 +24,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import com.google.common.collect.Lists; import com.google.common.hash.Hashing; @@ -76,7 +77,7 @@ public class SparkCreateMergeRels extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws ISLookUpException, DocumentException, IOException { + throws ISLookUpException, DocumentException, IOException, SAXException { final String graphBasePath = parser.get("graphBasePath"); final String workingPath = parser.get("workingPath"); @@ -161,11 +162,11 @@ public class SparkCreateMergeRels extends AbstractSparkAction { private ConnectedComponent generateID(String key, Iterator> values) { - List> identifiers = Lists.newArrayList(values).stream().map(v -> { - T entity = v._2(); - Identifier identifier = Identifier.newInstance(entity); - return identifier; - }).collect(Collectors.toList()); + List> identifiers = Lists + .newArrayList(values) + .stream() + .map(v -> Identifier.newInstance(v._2())) + .collect(Collectors.toList()); String rootID = IdGenerator.generate(identifiers, key); @@ -235,7 +236,6 @@ public class SparkCreateMergeRels extends AbstractSparkAction { info.setProvenanceaction(provenanceAction); // TODO calculate the trust value based on the similarity score of the elements in the CC - // info.setTrust(); r.setDataInfo(info); return r; diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java index df3db7add..8e5e9fd69 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java @@ -18,9 +18,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java index 884967364..f89f634b5 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java @@ -7,7 +7,6 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Dataset; @@ -17,6 +16,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.Block; @@ -26,8 +26,6 @@ import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.pace.config.DedupConfig; -import eu.dnetlib.pace.model.FieldListImpl; -import eu.dnetlib.pace.model.FieldValueImpl; import eu.dnetlib.pace.model.MapDocument; import eu.dnetlib.pace.util.MapDocumentUtil; import scala.Tuple2; @@ -56,7 +54,7 @@ public class SparkCreateSimRels extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws DocumentException, IOException, ISLookUpException { + throws DocumentException, IOException, ISLookUpException, SAXException { // read oozie parameters final String graphBasePath = parser.get("graphBasePath"); @@ -110,9 +108,6 @@ public class SparkCreateSimRels extends AbstractSparkAction { Encoders.bean(Relation.class)); saveParquet(simRels, outputPath, SaveMode.Overwrite); - - log.info("Generated " + simRels.count() + " Similarity Relations"); - } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java index 657d5a832..d12048b02 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java @@ -6,10 +6,6 @@ import java.util.Optional; import java.util.Properties; import org.apache.commons.io.IOUtils; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.function.FilterFunction; @@ -23,9 +19,9 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel; +import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; @@ -84,8 +80,9 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction { log.info("table: '{}'", dbTable); log.info("dbPwd: '{}'", "xxx"); - final String entityPath = DedupUtility.createEntityPath(graphBasePath, "organization"); - final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, "organization"); + final String organizazion = ModelSupport.getMainType(EntityType.organization); + final String entityPath = DedupUtility.createEntityPath(graphBasePath, organizazion); + final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, organizazion); final String relationPath = DedupUtility.createEntityPath(graphBasePath, "relation"); Dataset newOrgs = createNewOrgs(spark, mergeRelPath, relationPath, entityPath); @@ -115,7 +112,7 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction { .textFile(relationPath) .map(patchRelFn(), Encoders.bean(Relation.class)) .toJavaRDD() - .filter(r -> filterRels(r, "organization")) + .filter(r -> filterRels(r, ModelSupport.getMainType(EntityType.organization))) // take the worst id of the diffrel: .mapToPair(rel -> { if (DedupUtility.compareOpenOrgIds(rel.getSource(), rel.getTarget()) > 0) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java index 08b39793e..61325ab50 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java @@ -21,6 +21,7 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel; +import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -32,6 +33,7 @@ import scala.Tuple3; public class SparkPrepareOrgRels extends AbstractSparkAction { private static final Logger log = LoggerFactory.getLogger(SparkPrepareOrgRels.class); + public static final String GROUP_PREFIX = "group::"; public SparkPrepareOrgRels(ArgumentApplicationParser parser, SparkSession spark) { super(parser, spark); @@ -41,7 +43,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPrepareOrgRels.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/prepareOrgRels_parameters.json"))); parser.parseArgument(args); @@ -81,8 +83,9 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { log.info("table: '{}'", dbTable); log.info("dbPwd: '{}'", "xxx"); - final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, "organization"); - final String entityPath = DedupUtility.createEntityPath(graphBasePath, "organization"); + final String organization = ModelSupport.getMainType(EntityType.organization); + final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, organization); + final String entityPath = DedupUtility.createEntityPath(graphBasePath, organization); final String relationPath = DedupUtility.createEntityPath(graphBasePath, "relation"); Dataset relations = createRelations(spark, mergeRelPath, relationPath, entityPath); @@ -168,7 +171,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { .map(g -> Lists.newArrayList(g._2())) .filter(l -> l.size() > 1) .flatMap(l -> { - String groupId = "group::" + UUID.randomUUID(); + String groupId = GROUP_PREFIX + UUID.randomUUID(); List ids = sortIds(l); // sort IDs by type List, String>> rels = new ArrayList<>(); String source = ids.get(0); @@ -192,7 +195,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { .collect(Collectors.toList()))) // : take only relations with only the group_id, it // means they are correct. If the diffRel is present the relation has to be removed - .filter(g -> g._2().size() == 1 && g._2().get(0).contains("group::")) + .filter(g -> g._2().size() == 1 && g._2().get(0).contains(GROUP_PREFIX)) .map( t -> new Tuple3<>( t._1().split("@@@")[0], @@ -255,7 +258,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { // Sort IDs basing on the type. Priority: 1) openorgs, 2)corda, 3)alphabetic public static List sortIds(List ids) { - ids.sort((o1, o2) -> DedupUtility.compareOpenOrgIds(o1, o2)); + ids.sort(DedupUtility::compareOpenOrgIds); return ids; } @@ -289,9 +292,10 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { List> rels = new ArrayList<>(); for (String id1 : g._2()) { for (String id2 : g._2()) { - if (!id1.equals(id2)) - if (id1.contains(DedupUtility.OPENORGS_ID_PREFIX) && !id2.contains("openorgsmesh")) - rels.add(new Tuple2<>(id1, id2)); + if (!id1.equals(id2) && id1.contains(DedupUtility.OPENORGS_ID_PREFIX) + && !id2.contains("openorgsmesh")) { + rels.add(new Tuple2<>(id1, id2)); + } } } return rels.iterator(); @@ -310,7 +314,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { r._2()._2().getCountry() != null ? r._2()._2().getCountry().getClassid() : "", r._2()._2().getWebsiteurl() != null ? r._2()._2().getWebsiteurl().getValue() : "", r._2()._2().getCollectedfrom().get(0).getValue(), - "group::" + r._1()._1(), + GROUP_PREFIX + r._1()._1(), structuredPropertyListToString(r._2()._2().getPid()), parseECField(r._2()._2().getEclegalbody()), parseECField(r._2()._2().getEclegalperson()), diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java index 220b0f483..0fa41bd6d 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java @@ -40,7 +40,7 @@ public class SparkPropagateRelation extends AbstractSparkAction { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPropagateRelation.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json"))); @@ -113,7 +113,7 @@ public class SparkPropagateRelation extends AbstractSparkAction { .join(r.getSource(), r.getTarget(), r.getRelType(), r.getSubRelType(), r.getRelClass()), Encoders.STRING()) .agg(new RelationAggregator().toColumn()) - .map((MapFunction, Relation>) t -> t._2(), Encoders.bean(Relation.class)); + .map((MapFunction, Relation>) Tuple2::_2, Encoders.bean(Relation.class)); } // redirect the relations to the dedupID @@ -163,7 +163,7 @@ public class SparkPropagateRelation extends AbstractSparkAction { private FilterFunction getRelationFilterFunction() { return r -> StringUtils.isNotBlank(r.getSource()) || StringUtils.isNotBlank(r.getTarget()) || - StringUtils.isNotBlank(r.getRelClass()) || + StringUtils.isNotBlank(r.getRelType()) || StringUtils.isNotBlank(r.getSubRelType()) || StringUtils.isNotBlank(r.getRelClass()); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java index fdef7f77d..49021ab58 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.oa.dedup; import java.io.IOException; +import java.util.Map; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; @@ -13,7 +14,6 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -26,8 +26,10 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.pace.util.MapDocumentUtil; @@ -72,83 +74,76 @@ public class SparkUpdateEntity extends AbstractSparkAction { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - // for each entity - ModelSupport.entityTypes - .forEach( - (type, clazz) -> { - final String outputPath = dedupGraphPath + "/" + type; - removeOutputDir(spark, outputPath); - final String ip = DedupUtility.createEntityPath(graphBasePath, type.toString()); - if (HdfsSupport.exists(ip, sc.hadoopConfiguration())) { - JavaRDD sourceEntity = sc - .textFile(DedupUtility.createEntityPath(graphBasePath, type.toString())); + for (Map.Entry e : ModelSupport.entityTypes.entrySet()) { + final EntityType type = e.getKey(); + final Class clazz = e.getValue(); + final String outputPath = dedupGraphPath + "/" + type; + removeOutputDir(spark, outputPath); + final String ip = DedupUtility.createEntityPath(graphBasePath, type.toString()); + if (HdfsSupport.exists(ip, sc.hadoopConfiguration())) { + JavaRDD sourceEntity = sc + .textFile(DedupUtility.createEntityPath(graphBasePath, type.toString())); - if (mergeRelExists(workingPath, type.toString())) { + if (mergeRelExists(workingPath, type.toString())) { - final String mergeRelPath = DedupUtility - .createMergeRelPath(workingPath, "*", type.toString()); - final String dedupRecordPath = DedupUtility - .createDedupRecordPath(workingPath, "*", type.toString()); + final String mergeRelPath = DedupUtility + .createMergeRelPath(workingPath, "*", type.toString()); + final String dedupRecordPath = DedupUtility + .createDedupRecordPath(workingPath, "*", type.toString()); - final Dataset rel = spark - .read() - .load(mergeRelPath) - .as(Encoders.bean(Relation.class)); + final Dataset rel = spark + .read() + .load(mergeRelPath) + .as(Encoders.bean(Relation.class)); - final JavaPairRDD mergedIds = rel - .where("relClass == 'merges'") - .where("source != target") - .select(rel.col("target")) - .distinct() - .toJavaRDD() - .mapToPair( - (PairFunction) r -> new Tuple2<>(r.getString(0), "d")); + final JavaPairRDD mergedIds = rel + .where("relClass == 'merges'") + .where("source != target") + .select(rel.col("target")) + .distinct() + .toJavaRDD() + .mapToPair( + (PairFunction) r -> new Tuple2<>(r.getString(0), "d")); - JavaPairRDD entitiesWithId = sourceEntity - .mapToPair( - (PairFunction) s -> new Tuple2<>( - MapDocumentUtil.getJPathString(IDJSONPATH, s), s)); - if (type == EntityType.organization) // exclude root records from organizations - entitiesWithId = excludeRootOrgs(entitiesWithId, rel); + JavaPairRDD entitiesWithId = sourceEntity + .mapToPair( + (PairFunction) s -> new Tuple2<>( + MapDocumentUtil.getJPathString(IDJSONPATH, s), s)); + if (type == EntityType.organization) // exclude root records from organizations + entitiesWithId = excludeRootOrgs(entitiesWithId, rel); - JavaRDD map = entitiesWithId - .leftOuterJoin(mergedIds) - .map(k -> { - if (k._2()._2().isPresent()) { - return updateDeletedByInference(k._2()._1(), clazz); - } - return k._2()._1(); - }); + JavaRDD map = entitiesWithId + .leftOuterJoin(mergedIds) + .map(k -> { + if (k._2()._2().isPresent()) { + return updateDeletedByInference(k._2()._1(), clazz); + } + return k._2()._1(); + }); - sourceEntity = map.union(sc.textFile(dedupRecordPath)); - - } - - sourceEntity.saveAsTextFile(outputPath, GzipCodec.class); - } - }); + sourceEntity = map.union(sc.textFile(dedupRecordPath)); + } + sourceEntity.saveAsTextFile(outputPath, GzipCodec.class); + } + } } - public boolean mergeRelExists(String basePath, String entity) { + public boolean mergeRelExists(String basePath, String entity) throws IOException { boolean result = false; - try { - FileSystem fileSystem = FileSystem.get(new Configuration()); - FileStatus[] fileStatuses = fileSystem.listStatus(new Path(basePath)); + FileSystem fileSystem = FileSystem.get(new Configuration()); + FileStatus[] fileStatuses = fileSystem.listStatus(new Path(basePath)); - for (FileStatus fs : fileStatuses) { - if (fs.isDirectory()) - if (fileSystem - .exists( - new Path(DedupUtility.createMergeRelPath(basePath, fs.getPath().getName(), entity)))) - result = true; + for (FileStatus fs : fileStatuses) { + final Path mergeRelPath = new Path( + DedupUtility.createMergeRelPath(basePath, fs.getPath().getName(), entity)); + if (fs.isDirectory() && fileSystem.exists(mergeRelPath)) { + result = true; } - - return result; - } catch (IOException e) { - throw new RuntimeException(e); } + + return result; } private static String updateDeletedByInference( diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java new file mode 100644 index 000000000..7d91e47cc --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java @@ -0,0 +1,154 @@ + +package eu.dnetlib.dhp.oa.dedup; + +import java.io.IOException; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.PairFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.dom4j.DocumentException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.utils.ISLookupClientFactory; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import eu.dnetlib.pace.config.DedupConfig; +import eu.dnetlib.pace.model.MapDocument; +import eu.dnetlib.pace.util.MapDocumentUtil; +import scala.Tuple2; + +public class SparkWhitelistSimRels extends AbstractSparkAction { + + private static final Logger log = LoggerFactory.getLogger(SparkCreateSimRels.class); + + private static final String WHITELIST_SEPARATOR = "####"; + + public SparkWhitelistSimRels(ArgumentApplicationParser parser, SparkSession spark) { + super(parser, spark); + } + + public static void main(String[] args) throws Exception { + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json"))); + parser.parseArgument(args); + + SparkConf conf = new SparkConf(); + new SparkWhitelistSimRels(parser, getSparkSession(conf)) + .run(ISLookupClientFactory.getLookUpService(parser.get("isLookUpUrl"))); + } + + @Override + public void run(ISLookUpService isLookUpService) + throws DocumentException, IOException, ISLookUpException, SAXException { + + // read oozie parameters + final String graphBasePath = parser.get("graphBasePath"); + final String isLookUpUrl = parser.get("isLookUpUrl"); + final String actionSetId = parser.get("actionSetId"); + final String workingPath = parser.get("workingPath"); + final int numPartitions = Optional + .ofNullable(parser.get("numPartitions")) + .map(Integer::valueOf) + .orElse(NUM_PARTITIONS); + final String whiteListPath = parser.get("whiteListPath"); + + log.info("numPartitions: '{}'", numPartitions); + log.info("graphBasePath: '{}'", graphBasePath); + log.info("isLookUpUrl: '{}'", isLookUpUrl); + log.info("actionSetId: '{}'", actionSetId); + log.info("workingPath: '{}'", workingPath); + log.info("whiteListPath: '{}'", whiteListPath); + + JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + // file format: source####target + Dataset> whiteListRels = spark + .createDataset( + sc + .textFile(whiteListPath) + // check if the line is in the correct format: id1####id2 + .filter(s -> s.contains(WHITELIST_SEPARATOR) && s.split(WHITELIST_SEPARATOR).length == 2) + .map(s -> new Tuple2<>(s.split(WHITELIST_SEPARATOR)[0], s.split(WHITELIST_SEPARATOR)[1])) + .rdd(), + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + + // for each dedup configuration + for (DedupConfig dedupConf : getConfigurations(isLookUpService, actionSetId)) { + + final String entity = dedupConf.getWf().getEntityType(); + final String subEntity = dedupConf.getWf().getSubEntityValue(); + log.info("Adding whitelist simrels for: '{}'", subEntity); + + final String outputPath = DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity); + + Dataset> entities = spark + .createDataset( + sc + .textFile(DedupUtility.createEntityPath(graphBasePath, subEntity)) + .repartition(numPartitions) + .mapToPair( + (PairFunction) s -> { + MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, s); + return new Tuple2<>(d.getIdentifier(), "present"); + }) + .rdd(), + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + + Dataset> whiteListRels1 = whiteListRels + .joinWith(entities, whiteListRels.col("_1").equalTo(entities.col("_1")), "inner") + .map( + (MapFunction, Tuple2>, Tuple2>) Tuple2::_1, + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + + Dataset> whiteListRels2 = whiteListRels1 + .joinWith(entities, whiteListRels1.col("_2").equalTo(entities.col("_1")), "inner") + .map( + (MapFunction, Tuple2>, Tuple2>) Tuple2::_1, + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + + Dataset whiteListSimRels = whiteListRels2 + .map( + (MapFunction, Relation>) r -> createSimRel(r._1(), r._2(), entity), + Encoders.bean(Relation.class)); + + saveParquet(whiteListSimRels, outputPath, SaveMode.Append); + } + } + + private Relation createSimRel(String source, String target, String entity) { + final Relation r = new Relation(); + r.setSource(source); + r.setTarget(target); + r.setSubRelType("dedupSimilarity"); + r.setRelClass("isSimilarTo"); + r.setDataInfo(new DataInfo()); + + switch (entity) { + case "result": + r.setRelType("resultResult"); + break; + case "organization": + r.setRelType("organizationOrganization"); + break; + default: + throw new IllegalArgumentException("unmanaged entity type: " + entity); + } + return r; + } +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/UpdateOpenorgsJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/UpdateOpenorgsJob.java new file mode 100644 index 000000000..d094fb72b --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/UpdateOpenorgsJob.java @@ -0,0 +1,117 @@ + +package eu.dnetlib.dhp.oa.dedup; + +import java.util.concurrent.TimeUnit; + +import org.apache.commons.io.IOUtils; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class UpdateOpenorgsJob { + + private static final Logger log = LoggerFactory.getLogger(UpdateOpenorgsJob.class); + + public static void main(String[] args) throws Exception { + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateSimRels.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/updateOpenorgsJob_parameters.json"))); + parser.parseArgument(args); + + final String apiUrl = parser.get("apiUrl"); + final int delay = Integer.parseInt(parser.get("delay")); + + log.info("apiUrl: '{}'", apiUrl); + log.info("delay: '{}'", delay); + + APIResponse res = httpCall(apiUrl); + while (res != null && res.getStatus().equals(ImportStatus.RUNNING)) { + TimeUnit.MINUTES.sleep(delay); + res = httpCall(apiUrl + "/status"); + } + + if (res == null) { + log.error("Openorgs Update FAILED: No response"); + throw new RuntimeException("Openorgs Update FAILED: No response"); + } + + if (res.getStatus() == null || !res.getStatus().equals(ImportStatus.SUCCESS)) { + log.error("Openorgs Update FAILED: '{}' - '{}'", res.getStatus(), res.getMessage()); + throw new RuntimeException(res.getMessage()); + } + + } + + private static APIResponse httpCall(final String url) throws Exception { + final HttpGet req = new HttpGet(url); + + try (final CloseableHttpClient client = HttpClients.createDefault()) { + try (final CloseableHttpResponse response = client.execute(req)) { + final String s = IOUtils.toString(response.getEntity().getContent()); + return (new ObjectMapper()).readValue(s, APIResponse.class); + } + } + } + +} + +class APIResponse { + private String id; + private Long dateStart; + private Long dateEnd; + private ImportStatus status; + private String message; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public Long getDateStart() { + return dateStart; + } + + public void setDateStart(Long dateStart) { + this.dateStart = dateStart; + } + + public Long getDateEnd() { + return dateEnd; + } + + public void setDateEnd(Long dateEnd) { + this.dateEnd = dateEnd; + } + + public ImportStatus getStatus() { + return status; + } + + public void setStatus(ImportStatus status) { + this.status = status; + } + + public String getMessage() { + return message; + } + + public void setMessage(String message) { + this.message = message; + } +} + +enum ImportStatus { + SUCCESS, FAILED, RUNNING, NOT_LAUNCHED, NOT_YET_STARTED +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java index 3a986a9dd..3e564052e 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java @@ -3,29 +3,15 @@ package eu.dnetlib.dhp.oa.dedup.graph; import java.io.IOException; import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.lang.StringUtils; -import org.apache.spark.api.java.function.MapFunction; import org.codehaus.jackson.annotate.JsonIgnore; -import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.Lists; -import eu.dnetlib.dhp.oa.dedup.DedupUtility; -import eu.dnetlib.dhp.oa.dedup.IdGenerator; -import eu.dnetlib.dhp.oa.dedup.model.Identifier; -import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.utils.DHPUtils; -import eu.dnetlib.pace.config.DedupConfig; -import eu.dnetlib.pace.util.MapDocumentUtil; import eu.dnetlib.pace.util.PaceException; public class ConnectedComponent implements Serializable { diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java index e821d7ef5..a25a853ef 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java @@ -19,7 +19,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.PidComparator; import eu.dnetlib.dhp.schema.oaf.utils.PidType; -public class Identifier implements Serializable, Comparable { +public class Identifier implements Serializable, Comparable> { public static final String DATE_FORMAT = "yyyy-MM-dd"; public static final String BASE_DATE = "2000-01-01"; @@ -29,8 +29,8 @@ public class Identifier implements Serializable, Comparable // cached date value private Date date = null; - public static Identifier newInstance(T entity) { - return new Identifier(entity); + public static Identifier newInstance(T entity) { + return new Identifier<>(entity); } public Identifier(T entity) { @@ -88,7 +88,7 @@ public class Identifier implements Serializable, Comparable } @Override - public int compareTo(Identifier i) { + public int compareTo(Identifier i) { // priority in comparisons: 1) pidtype, 2) collectedfrom (depending on the entity type) , 3) date 4) // alphabetical order of the originalID diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml index 30442406c..6947019e8 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml @@ -28,6 +28,11 @@ dbPwd password to access the OpenOrgs database + + dbConnections + 10 + number of connections to the postgres db + workingPath path for the working directory @@ -223,7 +228,7 @@ --dbTable${dbTable} --dbUser${dbUser} --dbPwd${dbPwd} - --numConnections20 + --numConnections${dbConnections} @@ -254,19 +259,24 @@ --dbTable${dbTable} --dbUser${dbUser} --dbPwd${dbPwd} - --numConnections20 + --numConnections${dbConnections} - - ${jobTracker} - ${nameNode} - /usr/bin/curl - ${apiUrl} - + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.oa.dedup.UpdateOpenorgsJob + --apiUrl${apiUrl} + --delay5 + diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml index 342d83e8e..02fdd8431 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml @@ -20,6 +20,10 @@ workingPath path for the working directory + + whiteListPath + path for the whitelist of similarity relations + dedupGraphPath path for the output graph @@ -130,6 +134,34 @@ --workingPath${workingPath} --numPartitions8000 + + + + + + + yarn + cluster + Add Whitelist Similarity Relations + eu.dnetlib.dhp.oa.dedup.SparkWhitelistSimRels + dhp-dedup-openaire-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --graphBasePath${graphBasePath} + --isLookUpUrl${isLookUpUrl} + --actionSetId${actionSetId} + --workingPath${workingPath} + --whiteListPath${whiteListPath} + --numPartitions8000 + diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/updateOpenorgsJob_parameters.json b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/updateOpenorgsJob_parameters.json new file mode 100644 index 000000000..5ca4a3dba --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/updateOpenorgsJob_parameters.json @@ -0,0 +1,14 @@ +[ + { + "paramName": "api", + "paramLongName": "apiUrl", + "paramDescription": "the url of the API", + "paramRequired": true + }, + { + "paramName": "d", + "paramLongName": "delay", + "paramDescription": "delay for the HTTP call in minutes", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json new file mode 100644 index 000000000..0a5cad7c4 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json @@ -0,0 +1,38 @@ +[ + { + "paramName": "la", + "paramLongName": "isLookUpUrl", + "paramDescription": "address for the LookUp", + "paramRequired": true + }, + { + "paramName": "asi", + "paramLongName": "actionSetId", + "paramDescription": "action set identifier (name of the orchestrator)", + "paramRequired": true + }, + { + "paramName": "i", + "paramLongName": "graphBasePath", + "paramDescription": "the base path of the raw graph", + "paramRequired": true + }, + { + "paramName": "w", + "paramLongName": "workingPath", + "paramDescription": "path of the working directory", + "paramRequired": true + }, + { + "paramName": "np", + "paramLongName": "numPartitions", + "paramDescription": "number of partitions for the similarity relations intermediate phases", + "paramRequired": false + }, + { + "paramName": "wl", + "paramLongName": "whiteListPath", + "paramDescription": "whitelist file path for the addition of custom simrels", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java index 7c58c375a..daea29a07 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java @@ -10,12 +10,12 @@ import org.junit.jupiter.api.Test; import com.clearspring.analytics.util.Lists; -public class DatePickerTest { +class DatePickerTest { Collection dates = Lists.newArrayList(); @Test - public void testPickISO() { + void testPickISO() { dates.add("2016-01-01T12:00:00Z"); dates.add("2016-06-16T12:00:00Z"); dates.add("2020-01-01T12:00:00Z"); @@ -24,7 +24,7 @@ public class DatePickerTest { } @Test - public void testPickSimple() { + void testPickSimple() { dates.add("2016-01-01"); dates.add("2016-06-16"); dates.add("2020-01-01"); @@ -33,7 +33,7 @@ public class DatePickerTest { } @Test - public void testPickFrequent() { + void testPickFrequent() { dates.add("2016-02-01"); dates.add("2016-02-01"); dates.add("2016-02-01"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java index 80154fbb7..e86f91f99 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java @@ -20,7 +20,7 @@ import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.pace.util.MapDocumentUtil; import scala.Tuple2; -public class EntityMergerTest implements Serializable { +class EntityMergerTest implements Serializable { private List> publications; private List> publications2; @@ -54,7 +54,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void softwareMergerTest() throws InstantiationException, IllegalAccessException { + void softwareMergerTest() throws InstantiationException, IllegalAccessException { List> softwares = readSample( testEntityBasePath + "/software_merge.json", Software.class); @@ -69,7 +69,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest() throws InstantiationException, IllegalAccessException { + void publicationMergerTest() throws InstantiationException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class); @@ -125,7 +125,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest2() throws InstantiationException, IllegalAccessException { + void publicationMergerTest2() throws InstantiationException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications2.iterator(), 0, dataInfo, Publication.class); @@ -137,7 +137,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest3() throws InstantiationException, IllegalAccessException { + void publicationMergerTest3() throws InstantiationException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications3.iterator(), 0, dataInfo, Publication.class); @@ -147,7 +147,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest4() throws InstantiationException, IllegalStateException, IllegalAccessException { + void publicationMergerTest4() throws InstantiationException, IllegalStateException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications4.iterator(), 0, dataInfo, Publication.class); @@ -157,7 +157,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest5() throws InstantiationException, IllegalStateException, IllegalAccessException { + void publicationMergerTest5() throws InstantiationException, IllegalStateException, IllegalAccessException { System.out .println( diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java index 1a279fac7..2d6637882 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java @@ -56,7 +56,7 @@ public class IdGeneratorTest { } @Test - public void generateIdTest1() { + void generateIdTest1() { String id1 = IdGenerator.generate(bestIds, "50|defaultID"); System.out @@ -66,7 +66,7 @@ public class IdGeneratorTest { } @Test - public void generateIdTest2() { + void generateIdTest2() { String id1 = IdGenerator.generate(bestIds2, "50|defaultID"); String id2 = IdGenerator.generate(bestIds3, "50|defaultID"); @@ -82,7 +82,7 @@ public class IdGeneratorTest { } @Test - public void generateIdOrganizationTest() { + void generateIdOrganizationTest() { String id1 = IdGenerator.generate(bestIdsOrg, "20|defaultID"); assertEquals("20|openorgs____::599c15a70fcb03be6ba08f75f14d6076", id1); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index bf4913056..549988767 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -5,9 +5,11 @@ import static java.nio.file.Files.createTempDirectory; import static org.apache.spark.sql.functions.count; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.lenient; import java.io.File; +import java.io.FileReader; import java.io.IOException; import java.io.Serializable; import java.net.URISyntaxException; @@ -56,6 +58,10 @@ public class SparkDedupTest implements Serializable { private static String testOutputBasePath; private static String testDedupGraphBasePath; private static final String testActionSetId = "test-orchestrator"; + private static String whitelistPath; + private static List whiteList; + + private static String WHITELIST_SEPARATOR = "####"; @BeforeAll public static void cleanUp() throws IOException, URISyntaxException { @@ -72,6 +78,12 @@ public class SparkDedupTest implements Serializable { .toAbsolutePath() .toString(); + whitelistPath = Paths + .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/whitelist.simrels.txt").toURI()) + .toFile() + .getAbsolutePath(); + whiteList = IOUtils.readLines(new FileReader(whitelistPath)); + FileUtils.deleteDirectory(new File(testOutputBasePath)); FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); @@ -148,7 +160,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(1) - public void createSimRelsTest() throws Exception { + void createSimRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -196,14 +208,92 @@ public class SparkDedupTest implements Serializable { assertEquals(3082, orgs_simrel); assertEquals(7036, pubs_simrel); - assertEquals(344, sw_simrel); + assertEquals(336, sw_simrel); assertEquals(442, ds_simrel); assertEquals(6750, orp_simrel); } @Test @Order(2) - public void cutMergeRelsTest() throws Exception { + void whitelistSimRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkWhitelistSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json"))); + + parser + .parseArgument( + new String[] { + "-i", testGraphBasePath, + "-asi", testActionSetId, + "-la", "lookupurl", + "-w", testOutputBasePath, + "-np", "50", + "-wl", whitelistPath + }); + + new SparkWhitelistSimRels(parser, spark).run(isLookUpService); + + long orgs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) + .count(); + + long pubs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) + .count(); + + long ds_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "dataset")) + .count(); + + long orp_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) + .count(); + + // entities simrels supposed to be equal to the number of previous step (no rels in whitelist) + assertEquals(3082, orgs_simrel); + assertEquals(7036, pubs_simrel); + assertEquals(442, ds_simrel); + assertEquals(6750, orp_simrel); + + // entities simrels to be different from the number of previous step (new simrels in the whitelist) + Dataset sw_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "software")); + + // check if the first relation in the whitelist exists + assertTrue( + sw_simrel + .as(Encoders.bean(Relation.class)) + .toJavaRDD() + .filter( + rel -> rel.getSource().equalsIgnoreCase(whiteList.get(0).split(WHITELIST_SEPARATOR)[0]) + && rel.getTarget().equalsIgnoreCase(whiteList.get(0).split(WHITELIST_SEPARATOR)[1])) + .count() > 0); + // check if the second relation in the whitelist exists + assertTrue( + sw_simrel + .as(Encoders.bean(Relation.class)) + .toJavaRDD() + .filter( + rel -> rel.getSource().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[0]) + && rel.getTarget().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[1])) + .count() > 0); + + assertEquals(338, sw_simrel.count()); + + } + + @Test + @Order(3) + void cutMergeRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -298,8 +388,8 @@ public class SparkDedupTest implements Serializable { } @Test - @Order(3) - public void createMergeRelsTest() throws Exception { + @Order(4) + void createMergeRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -347,15 +437,15 @@ public class SparkDedupTest implements Serializable { assertEquals(1272, orgs_mergerel); assertEquals(1438, pubs_mergerel); - assertEquals(288, sw_mergerel); + assertEquals(286, sw_mergerel); assertEquals(472, ds_mergerel); assertEquals(718, orp_mergerel); } @Test - @Order(4) - public void createDedupRecordTest() throws Exception { + @Order(5) + void createDedupRecordTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -395,14 +485,14 @@ public class SparkDedupTest implements Serializable { assertEquals(85, orgs_deduprecord); assertEquals(65, pubs_deduprecord); - assertEquals(51, sw_deduprecord); + assertEquals(49, sw_deduprecord); assertEquals(97, ds_deduprecord); assertEquals(89, orp_deduprecord); } @Test - @Order(5) - public void updateEntityTest() throws Exception { + @Order(6) + void updateEntityTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -480,7 +570,7 @@ public class SparkDedupTest implements Serializable { assertEquals(838, organizations); assertEquals(100, projects); assertEquals(100, datasource); - assertEquals(200, softwares); + assertEquals(198, softwares); assertEquals(389, dataset); assertEquals(517, otherresearchproduct); @@ -517,8 +607,8 @@ public class SparkDedupTest implements Serializable { } @Test - @Order(6) - public void propagateRelationTest() throws Exception { + @Order(7) + void propagateRelationTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -536,7 +626,7 @@ public class SparkDedupTest implements Serializable { long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); - assertEquals(4862, relations); + assertEquals(4860, relations); // check deletedbyinference final Dataset mergeRels = spark @@ -567,8 +657,8 @@ public class SparkDedupTest implements Serializable { } @Test - @Order(7) - public void testRelations() throws Exception { + @Order(8) + void testRelations() throws Exception { testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_1.json", 12, 10); testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_2.json", 10, 2); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java index 97cfab118..9312d83b1 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java @@ -67,7 +67,7 @@ public class SparkOpenorgsDedupTest implements Serializable { public static void cleanUp() throws IOException, URISyntaxException { testGraphBasePath = Paths - .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/openorgs/dedup").toURI()) + .get(SparkOpenorgsDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/openorgs/dedup").toURI()) .toFile() .getAbsolutePath(); testOutputBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") @@ -101,7 +101,7 @@ public class SparkOpenorgsDedupTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkOpenorgsDedupTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator_openorgs.xml"))); @@ -110,14 +110,14 @@ public class SparkOpenorgsDedupTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkOpenorgsDedupTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); } @Test @Order(1) - public void createSimRelsTest() throws Exception { + void createSimRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -148,7 +148,7 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(2) - public void copyOpenorgsSimRels() throws Exception { + void copyOpenorgsSimRels() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( @@ -177,7 +177,7 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(3) - public void createMergeRelsTest() throws Exception { + void createMergeRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -230,11 +230,11 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(4) - public void prepareOrgRelsTest() throws Exception { + void prepareOrgRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPrepareOrgRels.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/prepareOrgRels_parameters.json"))); parser @@ -313,11 +313,11 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(5) - public void prepareNewOrgsTest() throws Exception { + void prepareNewOrgsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPrepareNewOrgs.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/prepareNewOrgs_parameters.json"))); parser diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java index 606dd9e5b..2349ffebe 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.dedup; import static java.nio.file.Files.createTempDirectory; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.mockito.Mockito.lenient; import java.io.File; @@ -12,8 +11,6 @@ import java.io.IOException; import java.io.Serializable; import java.net.URISyntaxException; import java.nio.file.Paths; -import java.util.Collections; -import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; @@ -32,9 +29,6 @@ import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -110,7 +104,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(1) - public void copyOpenorgsMergeRelTest() throws Exception { + void copyOpenorgsMergeRelTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -143,7 +137,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(2) - public void createOrgsDedupRecordTest() throws Exception { + void createOrgsDedupRecordTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -176,7 +170,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(3) - public void updateEntityTest() throws Exception { + void updateEntityTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -216,7 +210,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(4) - public void copyRelationsNoOpenorgsTest() throws Exception { + void copyRelationsNoOpenorgsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -239,7 +233,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(5) - public void propagateRelationsTest() throws Exception { + void propagateRelationsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java index 31de8d951..1ba2c717c 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java @@ -41,7 +41,7 @@ public class SparkStatsTest implements Serializable { private static final String testActionSetId = "test-orchestrator"; @BeforeAll - public static void cleanUp() throws IOException, URISyntaxException { + public static void beforeAll() throws IOException, URISyntaxException { testGraphBasePath = Paths .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/entities").toURI()) @@ -73,7 +73,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator.xml"))); @@ -82,7 +82,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); @@ -91,7 +91,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json"))); @@ -100,7 +100,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json"))); @@ -109,7 +109,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json"))); @@ -118,18 +118,18 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json"))); } @Test - public void createBlockStatsTest() throws Exception { + void createBlockStatsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/createBlockStats_parameters.json"))); parser @@ -168,10 +168,15 @@ public class SparkStatsTest implements Serializable { .textFile(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_blockstats") .count(); - assertEquals(549, orgs_blocks); - assertEquals(299, pubs_blocks); + assertEquals(477, orgs_blocks); + assertEquals(295, pubs_blocks); assertEquals(122, sw_blocks); - assertEquals(186, ds_blocks); - assertEquals(170, orp_blocks); + assertEquals(191, ds_blocks); + assertEquals(171, orp_blocks); + } + + @AfterAll + public static void tearDown() { + spark.close(); } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java index 1759180d2..7348a3bd2 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java @@ -1,13 +1,15 @@ package eu.dnetlib.dhp.oa.dedup.jpath; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.junit.platform.commons.util.StringUtils; import eu.dnetlib.pace.config.DedupConfig; import eu.dnetlib.pace.model.MapDocument; import eu.dnetlib.pace.util.MapDocumentUtil; -public class JsonPathTest { +class JsonPathTest { String json = "{\t\"dataInfo\":{\t\t\"invisible\":false,\t\t\"inferred\":false,\t\t\"deletedbyinference\":false,\t\t\"trust\":\"0.810000002384185791\",\t\t\"inferenceprovenance\":\"\",\t\t\"provenanceaction\":{\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t}\t},\t\"lastupdatetimestamp\":1584960968152,\t\"id\":\"20|corda__h2020::9faf23721249f26ac2c16eb857ea1fb9\",\t\"originalId\":[\t\t\"corda__h2020::927957582\"\t],\t\"collectedfrom\":[\t\t{\t\t\t\"key\":\"openaire____::corda_h2020\",\t\t\t\"value\":\"CORDA - COmmon Research DAta Warehouse - Horizon 2020\",\t\t\t\"dataInfo\":null\t\t}\t],\t\"pid\":[\t],\t\"dateofcollection\":\"2016-06-05\",\t\"dateoftransformation\":\"2019-11-19\",\t\"extraInfo\":[\t],\t\"oaiprovenance\":null,\t\"legalshortname\":{\t\t\"value\":\"Comentor AB\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"legalname\":{\t\t\"value\":\"Comentor AB\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"alternativeNames\":[\t],\t\"websiteurl\":{\t\t\"value\":\"http://www.comentor.se\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"logourl\":null,\t\"eclegalbody\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"eclegalperson\":{\t\t\"value\":\"true\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecnonprofit\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecresearchorganization\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"echighereducation\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecinternationalorganizationeurinterests\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecinternationalorganization\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecenterprise\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecsmevalidated\":{\t\t\"value\":\"true\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecnutscode\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"country\":null}"; DedupConfig conf = DedupConfig @@ -283,15 +285,18 @@ public class JsonPathTest { + "}"); @Test - public void testJPath() throws Exception { + void testJPath() { MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(conf, json); + Assertions.assertNotNull(d); + Assertions.assertTrue(StringUtils.isNotBlank(d.getIdentifier())); + System.out.println("d = " + d); } @Test - public void testNull() throws Exception { + void testNull() { final Object p = null; System.out.println((String) p); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/whitelist.simrels.txt b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/whitelist.simrels.txt new file mode 100644 index 000000000..862ca466d --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/whitelist.simrels.txt @@ -0,0 +1,2 @@ +50|r37b0ad08687::f645b9729d1e1025a72c57883f0f2cac####50|r37b0ad08687::4c55b436743b5c49fa32cd582fd9e1aa +50|datacite____::a90f49f9fde5393c00633bea6e4e374a####50|datacite____::5f55cdee77303ba8a2bf9996c32a330c \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index e68880433..efd5d2497 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -231,7 +231,7 @@ object DoiBoostMappingUtil { var hb = new KeyValue if (item != null) { hb.setValue(item.officialname) - hb.setKey(generateDSId(item.id)) + hb.setKey(item.id) if (item.openAccess) { i.setAccessright(getOpenAccessQualifier()) i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala index b77de13b9..e501b4823 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala @@ -13,10 +13,30 @@ import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ +import org.json4s.DefaultFormats +import org.json4s.JsonAST.{JField, JObject, JString,JArray} +import org.json4s.jackson.JsonMethods.parse object SparkGenerateDoiBoost { + def extractIdGRID(input:String):List[(String,String)] = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: org.json4s.JValue = parse(input) + + val id:String = (json \ "id").extract[String] + + val grids:List[String] = for { + + JObject(pid) <- json \ "pid" + JField("qualifier", JObject(qualifier)) <- pid + JField("classid", JString(classid)) <-qualifier + JField("value", JString(vl)) <- pid + if classid == "GRID" + } yield vl + grids.map(g => (id, s"unresolved::grid::${g.toLowerCase}"))(collection.breakOut) + } + def main(args: Array[String]): Unit = { @@ -36,6 +56,7 @@ object SparkGenerateDoiBoost { val hostedByMapPath = parser.get("hostedByMapPath") val workingDirPath = parser.get("workingPath") + val openaireOrganizationPath = parser.get("openaireOrganizationPath") val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable { override def zero: Publication = new Publication @@ -156,7 +177,7 @@ object SparkGenerateDoiBoost { magPubs.joinWith(a,magPubs("_1").equalTo(a("PaperId"))).flatMap(item => { val pub:Publication = item._1._2 val affiliation = item._2 - val affId:String = if (affiliation.GridId.isDefined) DoiBoostMappingUtil.generateGridAffiliationId(affiliation.GridId.get) else DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString) + val affId:String = if (affiliation.GridId.isDefined) s"unresolved::grid::${affiliation.GridId.get.toLowerCase}" else DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString) val r:Relation = new Relation r.setSource(pub.getId) r.setTarget(affId) @@ -174,9 +195,35 @@ object SparkGenerateDoiBoost { r1.setDataInfo(pub.getDataInfo) r1.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava) List(r, r1) - })(mapEncoderRel).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationAffiliation") + })(mapEncoderRel).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationAffiliation_unresolved") + + + val unresolvedRels:Dataset[(String, Relation)] = spark.read.load(s"$workingDirPath/doiBoostPublicationAffiliation_unresolved").as[Relation].map(r => { + + if (r.getSource.startsWith("unresolved")) + (r.getSource, r) + else if (r.getTarget.startsWith("unresolved")) + (r.getTarget,r) + else + ("resolved", r) + }) + + val openaireOrganization:Dataset[(String,String)] = spark.read.text(openaireOrganizationPath).as[String].flatMap(s => extractIdGRID(s)).groupByKey(_._2).reduceGroups((x,y) => if (x != null) x else y ).map(_._2) + + unresolvedRels.joinWith(openaireOrganization,unresolvedRels("_1").equalTo(openaireOrganization("_2"))) + .map { x => + val currentRels = x._1._2 + val currentOrgs = x._2 + if (currentOrgs!= null) + if(currentRels.getSource.startsWith("unresolved")) + currentRels.setSource(currentOrgs._1) + else + currentRels.setTarget(currentOrgs._1) + currentRels + }.write.save(s"$workingDirPath/doiBoostPublicationAffiliation") + magPubs.joinWith(a,magPubs("_1").equalTo(a("PaperId"))).map( item => { val affiliation = item._2 if (affiliation.GridId.isEmpty) { diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java index ee6136b58..c6c207727 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java @@ -2,6 +2,7 @@ package eu.dnetlib.doiboost.crossref; import java.io.ByteArrayOutputStream; +import java.util.Objects; import java.util.Optional; import java.util.zip.Inflater; @@ -22,9 +23,11 @@ public class CrossrefImporter { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - CrossrefImporter.class - .getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/import_from_es.json"))); + Objects + .requireNonNull( + CrossrefImporter.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/import_from_es.json")))); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java index dcebbbcac..6d6a4ca78 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java @@ -1,6 +1,7 @@ package eu.dnetlib.doiboost.crossref; +import java.io.IOException; import java.util.Iterator; import java.util.List; @@ -11,8 +12,6 @@ import org.apache.http.client.methods.HttpPost; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.jayway.jsonpath.JsonPath; @@ -57,8 +56,8 @@ public class ESClient implements Iterator { try (CloseableHttpResponse response = client.execute(httpPost)) { return IOUtils.toString(response.getEntity().getContent()); } - } catch (Throwable e) { - throw new RuntimeException("Error on executing request ", e); + } catch (IOException e) { + throw new IllegalStateException("Error on executing request ", e); } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java index c7cae1fcb..d1861ff0a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java @@ -26,19 +26,19 @@ public class ExtractCrossrefRecords { .toString( ExtractCrossrefRecords.class .getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/crossref_dump_reader.json"))); + "/eu/dnetlib/dhp/doiboost/crossref_dump_reader/crossref_dump_reader.json"))); parser.parseArgument(args); final String hdfsServerUri = parser.get("hdfsServerUri"); - final String workingPath = parser.get("workingPath"); + final String workingPath = hdfsServerUri.concat(parser.get("workingPath")); final String outputPath = parser.get("outputPath"); final String crossrefFileNameTarGz = parser.get("crossrefFileNameTarGz"); - Path hdfsreadpath = new Path(hdfsServerUri.concat(crossrefFileNameTarGz)); + Path hdfsreadpath = new Path(workingPath.concat("/").concat(crossrefFileNameTarGz)); Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsServerUri.concat(workingPath)); + conf.set("fs.defaultFS", workingPath); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); - FileSystem fs = FileSystem.get(URI.create(hdfsServerUri.concat(workingPath)), conf); + FileSystem fs = FileSystem.get(URI.create(workingPath), conf); FSDataInputStream crossrefFileStream = fs.open(hdfsreadpath); try (TarArchiveInputStream tais = new TarArchiveInputStream( new GzipCompressorInputStream(crossrefFileStream))) { diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java index feb540fcd..f725b3222 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java @@ -29,8 +29,10 @@ public class ActivitiesDecompressor { private static final int MAX_XML_WORKS_PARSED = -1; private static final int XML_WORKS_PARSED_COUNTER_LOG_INTERVAL = 100000; - public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath) - throws Exception { + private ActivitiesDecompressor() { + } + + public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath) throws IOException { String uri = inputUri; FileSystem fs = FileSystem.get(URI.create(uri), conf); Path inputPath = new Path(uri); @@ -44,7 +46,7 @@ public class ActivitiesDecompressor { InputStream gzipInputStream = null; try { gzipInputStream = codec.createInputStream(fs.open(inputPath)); - parseTarActivities(fs, conf, gzipInputStream, outputPath); + parseTarActivities(conf, gzipInputStream, outputPath); } finally { Log.debug("Closing gzip stream"); @@ -52,8 +54,7 @@ public class ActivitiesDecompressor { } } - private static void parseTarActivities( - FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) { + private static void parseTarActivities(Configuration conf, InputStream gzipInputStream, Path outputPath) { int counter = 0; int doiFound = 0; int errorFromOrcidFound = 0; @@ -79,11 +80,11 @@ public class ActivitiesDecompressor { BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from // tarInput String line; - StringBuffer buffer = new StringBuffer(); + StringBuilder builder = new StringBuilder(); while ((line = br.readLine()) != null) { - buffer.append(line); + builder.append(line); } - WorkData workData = XMLRecordParser.VTDParseWorkData(buffer.toString().getBytes()); + WorkData workData = XMLRecordParser.VTDParseWorkData(builder.toString().getBytes()); if (workData != null) { if (workData.getErrorCode() != null) { errorFromOrcidFound += 1; @@ -113,7 +114,7 @@ public class ActivitiesDecompressor { } } else { - Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer); + Log.warn("Data not retrievable [" + entry.getName() + "] " + builder); xmlParserErrorFound += 1; } } @@ -177,11 +178,11 @@ public class ActivitiesDecompressor { counter++; BufferedReader br = new BufferedReader(new InputStreamReader(tais)); String line; - StringBuffer buffer = new StringBuffer(); + StringBuilder builder = new StringBuilder(); while ((line = br.readLine()) != null) { - buffer.append(line); + builder.append(line); } - String xml = buffer.toString(); + String xml = builder.toString(); String[] filenameParts = filename.split("/"); final Text key = new Text( XMLRecordParser diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java index 4de4a0266..99587b16a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java @@ -3,6 +3,7 @@ package eu.dnetlib.doiboost.orcid; import java.io.IOException; +import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -10,7 +11,6 @@ import org.apache.hadoop.fs.Path; import org.mortbay.log.Log; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork; public class ExtractXMLActivitiesData extends OrcidDSManager { private String outputWorksPath; @@ -22,11 +22,11 @@ public class ExtractXMLActivitiesData extends OrcidDSManager { extractXMLActivitiesData.extractWorks(); } - private void loadArgs(String[] args) throws Exception { + private void loadArgs(String[] args) throws ParseException, IOException { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - GenOrcidAuthorWork.class + ExtractXMLActivitiesData.class .getResourceAsStream( "/eu/dnetlib/dhp/doiboost/gen_orcid_works-no-doi_from_activities.json"))); parser.parseArgument(args); @@ -43,7 +43,6 @@ public class ExtractXMLActivitiesData extends OrcidDSManager { private void extractWorks() throws Exception { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz); Path outputPath = new Path( hdfsServerUri diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java index 5c2a35229..4121f3391 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java @@ -5,12 +5,13 @@ import java.io.IOException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.mortbay.log.Log; +import com.ximpleware.ParseException; + import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork; +import eu.dnetlib.dhp.parser.utility.VtdException; public class ExtractXMLSummariesData extends OrcidDSManager { @@ -27,7 +28,7 @@ public class ExtractXMLSummariesData extends OrcidDSManager { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - GenOrcidAuthorWork.class + ExtractXMLSummariesData.class .getResourceAsStream( "/eu/dnetlib/dhp/doiboost/gen_orcid_authors_from_summaries.json"))); parser.parseArgument(args); @@ -42,9 +43,8 @@ public class ExtractXMLSummariesData extends OrcidDSManager { Log.info("Output Authors Data: " + outputAuthorsPath); } - public void extractAuthors() throws Exception { + public void extractAuthors() throws IOException, VtdException, ParseException { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(summariesFileNameTarGz); Path outputPath = new Path( hdfsServerUri diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java index 3b4033450..7e4869fdd 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java @@ -22,9 +22,8 @@ public class OrcidAuthorsDOIsDataGen extends OrcidDSManager { orcidAuthorsDOIsDataGen.generateAuthorsDOIsData(); } - public void generateAuthorsDOIsData() throws Exception { + public void generateAuthorsDOIsData() throws IOException { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz); Path outputPath = new Path(hdfsServerUri.concat(workingPath).concat(outputAuthorsDOIsPath)); ActivitiesDecompressor.parseGzActivities(conf, tarGzUri, outputPath); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java index 73a4bfd05..0b4ef279d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java @@ -25,9 +25,8 @@ public class OrcidDSManager { orcidDSManager.generateAuthors(); } - public void generateAuthors() throws Exception { + public void generateAuthors() throws IOException { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(summariesFileNameTarGz); Path outputPath = new Path( hdfsServerUri diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java index 8cf070213..c549beb03 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java @@ -3,6 +3,7 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.io.FileNotFoundException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Optional; @@ -11,10 +12,6 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -24,8 +21,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.doiboost.orcid.model.DownloadedRecordData; +import eu.dnetlib.doiboost.orcid.util.DownloadsReport; import eu.dnetlib.doiboost.orcid.util.HDFSUtil; +import eu.dnetlib.doiboost.orcid.util.MultiAttemptsHttpConnector; import scala.Tuple2; public class SparkDownloadOrcidAuthors { @@ -64,25 +65,21 @@ public class SparkDownloadOrcidAuthors { String lastUpdate = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt"); logger.info("lastUpdate: {}", lastUpdate); if (StringUtils.isBlank(lastUpdate)) { - throw new RuntimeException("last update info not found"); + throw new FileNotFoundException("last update info not found"); } JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); LongAccumulator parsedRecordsAcc = spark.sparkContext().longAccumulator("parsed_records"); LongAccumulator modifiedRecordsAcc = spark.sparkContext().longAccumulator("to_download_records"); LongAccumulator downloadedRecordsAcc = spark.sparkContext().longAccumulator("downloaded_records"); - LongAccumulator errorHTTP403Acc = spark.sparkContext().longAccumulator("error_HTTP_403"); - LongAccumulator errorHTTP404Acc = spark.sparkContext().longAccumulator("error_HTTP_404"); - LongAccumulator errorHTTP409Acc = spark.sparkContext().longAccumulator("error_HTTP_409"); - LongAccumulator errorHTTP503Acc = spark.sparkContext().longAccumulator("error_HTTP_503"); - LongAccumulator errorHTTP525Acc = spark.sparkContext().longAccumulator("error_HTTP_525"); - LongAccumulator errorHTTPGenericAcc = spark.sparkContext().longAccumulator("error_HTTP_Generic"); + LongAccumulator errorsAcc = spark.sparkContext().longAccumulator("errors"); - logger.info("Retrieving data from lamda sequence file"); + String lambdaFilePath = workingPath + lambdaFileName; + logger.info("Retrieving data from lamda sequence file: " + lambdaFilePath); JavaPairRDD lamdaFileRDD = sc - .sequenceFile(workingPath + lambdaFileName, Text.class, Text.class); + .sequenceFile(lambdaFilePath, Text.class, Text.class); final long lamdaFileRDDCount = lamdaFileRDD.count(); - logger.info("Data retrieved: " + lamdaFileRDDCount); + logger.info("Data retrieved: {}", lamdaFileRDDCount); Function, Boolean> isModifiedAfterFilter = data -> { String orcidId = data._1().toString(); @@ -95,48 +92,50 @@ public class SparkDownloadOrcidAuthors { return false; }; - Function, Tuple2> downloadRecordFunction = data -> { + Function, Tuple2> downloadRecordFn = data -> { String orcidId = data._1().toString(); String lastModifiedDate = data._2().toString(); final DownloadedRecordData downloaded = new DownloadedRecordData(); downloaded.setOrcidId(orcidId); downloaded.setLastModifiedDate(lastModifiedDate); - CloseableHttpClient client = HttpClients.createDefault(); - HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record"); - httpGet.addHeader("Accept", "application/vnd.orcid+xml"); - httpGet.addHeader("Authorization", String.format("Bearer %s", token)); + final HttpClientParams clientParams = new HttpClientParams(); + MultiAttemptsHttpConnector httpConnector = new MultiAttemptsHttpConnector(clientParams); + httpConnector.setAuthMethod(MultiAttemptsHttpConnector.BEARER); + httpConnector.setAcceptHeaderValue("application/vnd.orcid+xml"); + httpConnector.setAuthToken(token); + String apiUrl = "https://api.orcid.org/v3.0/" + orcidId + "/record"; + DownloadsReport report = new DownloadsReport(); long startReq = System.currentTimeMillis(); - CloseableHttpResponse response = client.execute(httpGet); + boolean downloadCompleted = false; + String record = ""; + try { + record = httpConnector.getInputSource(apiUrl, report); + downloadCompleted = true; + } catch (CollectorException ce) { + if (!report.isEmpty()) { + int errCode = report.keySet().stream().findFirst().get(); + report.forEach((k, v) -> { + logger.error(k + " " + v); + }); + downloaded.setStatusCode(errCode); + } else { + downloaded.setStatusCode(-4); + } + errorsAcc.add(1); + } long endReq = System.currentTimeMillis(); long reqTime = endReq - startReq; if (reqTime < 1000) { Thread.sleep(1000 - reqTime); } - int statusCode = response.getStatusLine().getStatusCode(); - downloaded.setStatusCode(statusCode); - if (statusCode != 200) { - switch (statusCode) { - case 403: - errorHTTP403Acc.add(1); - case 404: - errorHTTP404Acc.add(1); - case 409: - errorHTTP409Acc.add(1); - case 503: - errorHTTP503Acc.add(1); - case 525: - errorHTTP525Acc.add(1); - default: - errorHTTPGenericAcc.add(1); - } - return downloaded.toTuple2(); + if (downloadCompleted) { + downloaded.setStatusCode(200); + downloadedRecordsAcc.add(1); + downloaded + .setCompressedData( + ArgumentApplicationParser + .compressArgument(record)); } - downloadedRecordsAcc.add(1); - downloaded - .setCompressedData( - ArgumentApplicationParser - .compressArgument(IOUtils.toString(response.getEntity().getContent()))); - client.close(); return downloaded.toTuple2(); }; @@ -145,31 +144,30 @@ public class SparkDownloadOrcidAuthors { logger.info("Start execution ..."); JavaPairRDD authorsModifiedRDD = lamdaFileRDD.filter(isModifiedAfterFilter); long authorsModifiedCount = authorsModifiedRDD.count(); - logger.info("Authors modified count: " + authorsModifiedCount); + logger.info("Authors modified count: {}", authorsModifiedCount); - logger.info("Start downloading ..."); - authorsModifiedRDD + final JavaPairRDD pairRDD = authorsModifiedRDD .repartition(100) - .map(downloadRecordFunction) - .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) - .saveAsNewAPIHadoopFile( - workingPath.concat(outputPath), - Text.class, - Text.class, - SequenceFileOutputFormat.class, - sc.hadoopConfiguration()); + .map(downloadRecordFn) + .mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))); + saveAsSequenceFile(workingPath, outputPath, sc, pairRDD); - logger.info("parsedRecordsAcc: " + parsedRecordsAcc.value().toString()); - logger.info("modifiedRecordsAcc: " + modifiedRecordsAcc.value().toString()); - logger.info("downloadedRecordsAcc: " + downloadedRecordsAcc.value().toString()); - logger.info("errorHTTP403Acc: " + errorHTTP403Acc.value().toString()); - logger.info("errorHTTP404Acc: " + errorHTTP404Acc.value().toString()); - logger.info("errorHTTP409Acc: " + errorHTTP409Acc.value().toString()); - logger.info("errorHTTP503Acc: " + errorHTTP503Acc.value().toString()); - logger.info("errorHTTP525Acc: " + errorHTTP525Acc.value().toString()); - logger.info("errorHTTPGenericAcc: " + errorHTTPGenericAcc.value().toString()); + logger.info("parsedRecordsAcc: {}", parsedRecordsAcc.value()); + logger.info("modifiedRecordsAcc: {}", modifiedRecordsAcc.value()); + logger.info("downloadedRecordsAcc: {}", downloadedRecordsAcc.value()); + logger.info("errorsAcc: {}", errorsAcc.value()); }); + } + private static void saveAsSequenceFile(String workingPath, String outputPath, JavaSparkContext sc, + JavaPairRDD pairRDD) { + pairRDD + .saveAsNewAPIHadoopFile( + workingPath.concat(outputPath), + Text.class, + Text.class, + SequenceFileOutputFormat.class, + sc.hadoopConfiguration()); } public static boolean isModified(String orcidId, String modifiedDate, String lastUpdate) { diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java index 59de7ca80..0569bacfd 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java @@ -3,8 +3,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; -import java.text.SimpleDateFormat; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.*; @@ -13,11 +11,6 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; -import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -31,8 +24,12 @@ import com.google.gson.JsonElement; import com.google.gson.JsonParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.doiboost.orcid.model.DownloadedRecordData; +import eu.dnetlib.doiboost.orcid.util.DownloadsReport; import eu.dnetlib.doiboost.orcid.util.HDFSUtil; +import eu.dnetlib.doiboost.orcid.util.MultiAttemptsHttpConnector; import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; import scala.Tuple2; @@ -61,7 +58,7 @@ public class SparkDownloadOrcidWorks { .orElse(Boolean.TRUE); logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); final String workingPath = parser.get("workingPath"); - logger.info("workingPath: ", workingPath); + logger.info("workingPath: {}", workingPath); final String outputPath = parser.get("outputPath"); final String token = parser.get("token"); final String hdfsServerUri = parser.get("hdfsServerUri"); @@ -93,12 +90,7 @@ public class SparkDownloadOrcidWorks { .sparkContext() .longAccumulator("error_parsing_xml_found"); LongAccumulator downloadedRecordsAcc = spark.sparkContext().longAccumulator("downloaded_records"); - LongAccumulator errorHTTP403Acc = spark.sparkContext().longAccumulator("error_HTTP_403"); - LongAccumulator errorHTTP404Acc = spark.sparkContext().longAccumulator("error_HTTP_404"); - LongAccumulator errorHTTP409Acc = spark.sparkContext().longAccumulator("error_HTTP_409"); - LongAccumulator errorHTTP503Acc = spark.sparkContext().longAccumulator("error_HTTP_503"); - LongAccumulator errorHTTP525Acc = spark.sparkContext().longAccumulator("error_HTTP_525"); - LongAccumulator errorHTTPGenericAcc = spark.sparkContext().longAccumulator("error_HTTP_Generic"); + LongAccumulator errorsAcc = spark.sparkContext().longAccumulator("errors"); JavaPairRDD updatedAuthorsRDD = sc .sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class); @@ -152,46 +144,44 @@ public class SparkDownloadOrcidWorks { final DownloadedRecordData downloaded = new DownloadedRecordData(); downloaded.setOrcidId(orcidId); downloaded.setLastModifiedDate(lastUpdateValue); - CloseableHttpClient client = HttpClients.createDefault(); - HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + relativeWorkUrl); - httpGet.addHeader("Accept", "application/vnd.orcid+xml"); - httpGet.addHeader("Authorization", String.format("Bearer %s", token)); + final HttpClientParams clientParams = new HttpClientParams(); + MultiAttemptsHttpConnector httpConnector = new MultiAttemptsHttpConnector(clientParams); + httpConnector.setAuthMethod(MultiAttemptsHttpConnector.BEARER); + httpConnector.setAcceptHeaderValue("application/vnd.orcid+xml"); + httpConnector.setAuthToken(token); + String apiUrl = "https://api.orcid.org/v3.0/" + relativeWorkUrl; + DownloadsReport report = new DownloadsReport(); long startReq = System.currentTimeMillis(); - CloseableHttpResponse response = client.execute(httpGet); + boolean downloadCompleted = false; + String record = ""; + try { + record = httpConnector.getInputSource(apiUrl, report); + downloadCompleted = true; + } catch (CollectorException ce) { + if (!report.isEmpty()) { + int errCode = report.keySet().stream().findFirst().get(); + report.forEach((k, v) -> { + logger.error(k + " " + v); + }); + downloaded.setStatusCode(errCode); + } else { + downloaded.setStatusCode(-4); + } + errorsAcc.add(1); + } long endReq = System.currentTimeMillis(); long reqTime = endReq - startReq; if (reqTime < 1000) { Thread.sleep(1000 - reqTime); } - int statusCode = response.getStatusLine().getStatusCode(); - downloaded.setStatusCode(statusCode); - if (statusCode != 200) { - switch (statusCode) { - case 403: - errorHTTP403Acc.add(1); - case 404: - errorHTTP404Acc.add(1); - case 409: - errorHTTP409Acc.add(1); - case 503: - errorHTTP503Acc.add(1); - case 525: - errorHTTP525Acc.add(1); - default: - errorHTTPGenericAcc.add(1); - logger - .info( - "Downloading " + orcidId + " status code: " - + response.getStatusLine().getStatusCode()); - } - return downloaded.toTuple2(); + if (downloadCompleted) { + downloaded.setStatusCode(200); + downloadedRecordsAcc.add(1); + downloaded + .setCompressedData( + ArgumentApplicationParser + .compressArgument(record)); } - downloadedRecordsAcc.add(1); - downloaded - .setCompressedData( - ArgumentApplicationParser - .compressArgument(IOUtils.toString(response.getEntity().getContent()))); - client.close(); return downloaded.toTuple2(); }; @@ -199,24 +189,20 @@ public class SparkDownloadOrcidWorks { .flatMap(retrieveWorkUrlFunction) .repartition(100) .map(downloadWorkFunction) - .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) + .mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))) .saveAsTextFile(workingPath.concat(outputPath), GzipCodec.class); - logger.info("updatedAuthorsAcc: " + updatedAuthorsAcc.value().toString()); - logger.info("parsedAuthorsAcc: " + parsedAuthorsAcc.value().toString()); - logger.info("parsedWorksAcc: " + parsedWorksAcc.value().toString()); - logger.info("modifiedWorksAcc: " + modifiedWorksAcc.value().toString()); - logger.info("maxModifiedWorksLimitAcc: " + maxModifiedWorksLimitAcc.value().toString()); - logger.info("errorCodeFoundAcc: " + errorCodeFoundAcc.value().toString()); - logger.info("errorLoadingJsonFoundAcc: " + errorLoadingJsonFoundAcc.value().toString()); - logger.info("errorLoadingXMLFoundAcc: " + errorLoadingXMLFoundAcc.value().toString()); - logger.info("errorParsingXMLFoundAcc: " + errorParsingXMLFoundAcc.value().toString()); - logger.info("downloadedRecordsAcc: " + downloadedRecordsAcc.value().toString()); - logger.info("errorHTTP403Acc: " + errorHTTP403Acc.value().toString()); - logger.info("errorHTTP409Acc: " + errorHTTP409Acc.value().toString()); - logger.info("errorHTTP503Acc: " + errorHTTP503Acc.value().toString()); - logger.info("errorHTTP525Acc: " + errorHTTP525Acc.value().toString()); - logger.info("errorHTTPGenericAcc: " + errorHTTPGenericAcc.value().toString()); + logger.info("updatedAuthorsAcc: {}", updatedAuthorsAcc.value()); + logger.info("parsedAuthorsAcc: {}", parsedAuthorsAcc.value()); + logger.info("parsedWorksAcc: {}", parsedWorksAcc.value()); + logger.info("modifiedWorksAcc: {}", modifiedWorksAcc.value()); + logger.info("maxModifiedWorksLimitAcc: {}", maxModifiedWorksLimitAcc.value()); + logger.info("errorCodeFoundAcc: {}", errorCodeFoundAcc.value()); + logger.info("errorLoadingJsonFoundAcc: {}", errorLoadingJsonFoundAcc.value()); + logger.info("errorLoadingXMLFoundAcc: {}", errorLoadingXMLFoundAcc.value()); + logger.info("errorParsingXMLFoundAcc: {}", errorParsingXMLFoundAcc.value()); + logger.info("downloadedRecordsAcc: {}", downloadedRecordsAcc.value()); + logger.info("errorsAcc: {}", errorsAcc.value()); }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java index 178d07608..d2fed61ec 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java @@ -3,7 +3,8 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.*; +import java.io.BufferedReader; +import java.io.InputStreamReader; import java.net.URI; import java.util.Arrays; import java.util.List; @@ -15,7 +16,6 @@ import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; @@ -28,10 +28,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.doiboost.orcid.util.HDFSUtil; public class SparkGenLastModifiedSeq { - private static String hdfsServerUri; - private static String workingPath; - private static String outputPath; - private static String lambdaFileName; public static void main(String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -45,12 +41,14 @@ public class SparkGenLastModifiedSeq { .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); - hdfsServerUri = parser.get("hdfsServerUri"); - workingPath = parser.get("workingPath"); - outputPath = parser.get("outputPath"); - lambdaFileName = parser.get("lambdaFileName"); - String lambdaFileUri = hdfsServerUri.concat(workingPath).concat(lambdaFileName); - String lastModifiedDateFromLambdaFileUri = "last_modified_date_from_lambda_file.txt"; + + final String hdfsServerUri = parser.get("hdfsServerUri"); + final String workingPath = parser.get("workingPath"); + final String outputPath = parser.get("outputPath"); + final String lambdaFileName = parser.get("lambdaFileName"); + + final String lambdaFileUri = hdfsServerUri.concat(workingPath).concat(lambdaFileName); + final String lastModifiedDateFromLambdaFileUri = "last_modified_date_from_lambda_file.txt"; SparkConf sparkConf = new SparkConf(); runWithSparkSession( @@ -64,7 +62,7 @@ public class SparkGenLastModifiedSeq { .concat(workingPath) .concat(outputPath)); Path hdfsreadpath = new Path(lambdaFileUri); - Configuration conf = new Configuration(); + Configuration conf = spark.sparkContext().hadoopConfiguration(); conf.set("fs.defaultFS", hdfsServerUri.concat(workingPath)); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java index 7d9f39d05..3ecb4f5e3 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java @@ -3,7 +3,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -53,13 +52,13 @@ public class SparkGenerateDoiAuthorList { .orElse(Boolean.TRUE); logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); final String workingPath = parser.get("workingPath"); - logger.info("workingPath: ", workingPath); + logger.info("workingPath: {}", workingPath); final String outputDoiAuthorListPath = parser.get("outputDoiAuthorListPath"); - logger.info("outputDoiAuthorListPath: ", outputDoiAuthorListPath); + logger.info("outputDoiAuthorListPath: {}", outputDoiAuthorListPath); final String authorsPath = parser.get("authorsPath"); - logger.info("authorsPath: ", authorsPath); + logger.info("authorsPath: {}", authorsPath); final String xmlWorksPath = parser.get("xmlWorksPath"); - logger.info("xmlWorksPath: ", xmlWorksPath); + logger.info("xmlWorksPath: {}", xmlWorksPath); SparkConf conf = new SparkConf(); runWithSparkSession( @@ -128,8 +127,7 @@ public class SparkGenerateDoiAuthorList { .concat( d1.stream(), d2.stream()); - List mergedAuthors = mergedStream.collect(Collectors.toList()); - return mergedAuthors; + return mergedStream.collect(Collectors.toList()); } if (d1 != null) { return d1; @@ -170,14 +168,6 @@ public class SparkGenerateDoiAuthorList { return authorData; } - private static WorkData loadWorkFromJson(Text orcidId, Text json) { - WorkData workData = new WorkData(); - workData.setOid(orcidId.toString()); - JsonElement jElement = new JsonParser().parse(json.toString()); - workData.setDoi(getJsonValue(jElement, "doi")); - return workData; - } - private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { JsonElement name = null; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java index 51326c610..1727f1825 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java @@ -4,7 +4,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static org.apache.spark.sql.functions.*; -import java.io.IOException; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -17,8 +16,10 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,11 +36,12 @@ import scala.Tuple2; public class SparkUpdateOrcidAuthors { + public static final Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidAuthors.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); public static void main(String[] args) throws Exception { - Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidAuthors.class); final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -53,7 +55,6 @@ public class SparkUpdateOrcidAuthors { .map(Boolean::valueOf) .orElse(Boolean.TRUE); final String workingPath = parser.get("workingPath"); -// final String outputPath = parser.get("outputPath"); SparkConf conf = new SparkConf(); runWithSparkSession( @@ -87,7 +88,7 @@ public class SparkUpdateOrcidAuthors { String jsonData = data._2().toString(); JsonElement jElement = new JsonParser().parse(jsonData); String statusCode = getJsonValue(jElement, "statusCode"); - String downloadDate = getJsonValue(jElement, "lastModifiedDate"); + if (statusCode.equals("200")) { String compressedData = getJsonValue(jElement, "compressedData"); if (StringUtils.isEmpty(compressedData)) { @@ -135,7 +136,7 @@ public class SparkUpdateOrcidAuthors { .col("authorData.oid") .equalTo(downloadedAuthorSummaryDS.col("authorData.oid")), "full_outer") - .map(value -> { + .map((MapFunction, AuthorSummary>) value -> { Optional opCurrent = Optional.ofNullable(value._1()); Optional opDownloaded = Optional.ofNullable(value._2()); if (!opCurrent.isPresent()) { @@ -183,7 +184,8 @@ public class SparkUpdateOrcidAuthors { .groupBy("authorData.oid") .agg(array_max(collect_list("downloadDate"))) .map( - row -> new Tuple2<>(row.get(0).toString(), row.get(1).toString()), + (MapFunction>) row -> new Tuple2<>(row.get(0).toString(), + row.get(1).toString()), Encoders.tuple(Encoders.STRING(), Encoders.STRING())) .toJavaRDD() .collect(); @@ -214,25 +216,24 @@ public class SparkUpdateOrcidAuthors { .dropDuplicates("downloadDate", "authorData"); cleanedDS .toJavaRDD() - .map(authorSummary -> OBJECT_MAPPER.writeValueAsString(authorSummary)) + .map(OBJECT_MAPPER::writeValueAsString) .saveAsTextFile(workingPath.concat("orcid_dataset/new_authors"), GzipCodec.class); long cleanedDSCount = cleanedDS.count(); - logger.info("report_oldAuthorsFoundAcc: " + oldAuthorsFoundAcc.value().toString()); - logger.info("report_newAuthorsFoundAcc: " + newAuthorsFoundAcc.value().toString()); - logger.info("report_updatedAuthorsFoundAcc: " + updatedAuthorsFoundAcc.value().toString()); - logger.info("report_errorCodeFoundAcc: " + errorCodeAuthorsFoundAcc.value().toString()); - logger.info("report_errorLoadingJsonFoundAcc: " + errorLoadingAuthorsJsonFoundAcc.value().toString()); - logger.info("report_errorParsingXMLFoundAcc: " + errorParsingAuthorsXMLFoundAcc.value().toString()); - logger.info("report_merged_count: " + mergedCount); - logger.info("report_cleaned_count: " + cleanedDSCount); + logger.info("report_oldAuthorsFoundAcc: {}", oldAuthorsFoundAcc.value()); + logger.info("report_newAuthorsFoundAcc: {}", newAuthorsFoundAcc.value()); + logger.info("report_updatedAuthorsFoundAcc: {}", updatedAuthorsFoundAcc.value()); + logger.info("report_errorCodeFoundAcc: {}", errorCodeAuthorsFoundAcc.value()); + logger.info("report_errorLoadingJsonFoundAcc: {}", errorLoadingAuthorsJsonFoundAcc.value()); + logger.info("report_errorParsingXMLFoundAcc: {}", errorParsingAuthorsXMLFoundAcc.value()); + logger.info("report_merged_count: {}", mergedCount); + logger.info("report_cleaned_count: {}", cleanedDSCount); }); } private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { - JsonElement name = null; - name = jElement.getAsJsonObject().get(property); + JsonElement name = jElement.getAsJsonObject().get(property); if (name != null && !name.isJsonNull()) { return name.getAsString(); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java index fa17e97e3..aad202ff6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java @@ -3,17 +3,16 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.util.LongAccumulator; @@ -26,20 +25,19 @@ import com.google.gson.JsonElement; import com.google.gson.JsonParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.orcid.AuthorSummary; import eu.dnetlib.dhp.schema.orcid.Work; import eu.dnetlib.dhp.schema.orcid.WorkDetail; -import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; import scala.Tuple2; public class SparkUpdateOrcidDatasets { + public static final Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidDatasets.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); public static void main(String[] args) throws Exception { - Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidDatasets.class); final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -53,7 +51,6 @@ public class SparkUpdateOrcidDatasets { .map(Boolean::valueOf) .orElse(Boolean.TRUE); final String workingPath = parser.get("workingPath"); -// final String outputPath = parser.get("outputPath"); SparkConf conf = new SparkConf(); runWithSparkSession( @@ -62,25 +59,6 @@ public class SparkUpdateOrcidDatasets { spark -> { JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - LongAccumulator oldAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("old_authors_found"); - LongAccumulator updatedAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("updated_authors_found"); - LongAccumulator newAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("new_authors_found"); - LongAccumulator errorCodeAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("error_code_authors_found"); - LongAccumulator errorLoadingAuthorsJsonFoundAcc = spark - .sparkContext() - .longAccumulator("error_loading_authors_json_found"); - LongAccumulator errorParsingAuthorsXMLFoundAcc = spark - .sparkContext() - .longAccumulator("error_parsing_authors_xml_found"); - LongAccumulator oldWorksFoundAcc = spark .sparkContext() .longAccumulator("old_works_found"); @@ -100,125 +78,11 @@ public class SparkUpdateOrcidDatasets { .sparkContext() .longAccumulator("error_parsing_works_xml_found"); -// JavaPairRDD xmlSummariesRDD = sc -// .sequenceFile(workingPath.concat("xml/authors/xml_authors.seq"), Text.class, Text.class); -// xmlSummariesRDD -// .map(seq -> { -// AuthorSummary authorSummary = XMLRecordParser -// .VTDParseAuthorSummary(seq._2().toString().getBytes()); -// authorSummary -// .setBase64CompressData(ArgumentApplicationParser.compressArgument(seq._2().toString())); -// return authorSummary; -// }) -// .filter(authorSummary -> authorSummary != null) -// .map(authorSummary -> JsonWriter.create(authorSummary)) -// .saveAsTextFile(workingPath.concat("orcid_dataset/authors"), GzipCodec.class); -// -// JavaPairRDD xmlWorksRDD = sc -// .sequenceFile(workingPath.concat("xml/works/*"), Text.class, Text.class); -// -// xmlWorksRDD -// .map(seq -> { -// WorkDetail workDetail = XMLRecordParserNoDoi.VTDParseWorkData(seq._2().toString().getBytes()); -// Work work = new Work(); -// work.setWorkDetail(workDetail); -// work.setBase64CompressData(ArgumentApplicationParser.compressArgument(seq._2().toString())); -// return work; -// }) -// .filter(work -> work != null) -// .map(work -> JsonWriter.create(work)) -// .saveAsTextFile(workingPath.concat("orcid_dataset/works"), GzipCodec.class); - -// Function, AuthorSummary> retrieveAuthorSummaryFunction = data -> { -// AuthorSummary authorSummary = new AuthorSummary(); -// String orcidId = data._1().toString(); -// String jsonData = data._2().toString(); -// JsonElement jElement = new JsonParser().parse(jsonData); -// String statusCode = getJsonValue(jElement, "statusCode"); -// String downloadDate = getJsonValue(jElement, "lastModifiedDate"); -// if (statusCode.equals("200")) { -// String compressedData = getJsonValue(jElement, "compressedData"); -// if (StringUtils.isEmpty(compressedData)) { -// errorLoadingAuthorsJsonFoundAcc.add(1); -// } else { -// String xmlAuthor = ArgumentApplicationParser.decompressValue(compressedData); -// try { -// authorSummary = XMLRecordParser -// .VTDParseAuthorSummary(xmlAuthor.getBytes()); -// authorSummary.setStatusCode(statusCode); -// authorSummary.setDownloadDate("2020-11-18 00:00:05.644768"); -// authorSummary.setBase64CompressData(compressedData); -// return authorSummary; -// } catch (Exception e) { -// logger.error("parsing xml " + orcidId + " [" + jsonData + "]", e); -// errorParsingAuthorsXMLFoundAcc.add(1); -// } -// } -// } else { -// authorSummary.setStatusCode(statusCode); -// authorSummary.setDownloadDate("2020-11-18 00:00:05.644768"); -// errorCodeAuthorsFoundAcc.add(1); -// } -// return authorSummary; -// }; -// -// Dataset downloadedAuthorSummaryDS = spark -// .createDataset( -// sc -// .sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class) -// .map(retrieveAuthorSummaryFunction) -// .rdd(), -// Encoders.bean(AuthorSummary.class)); -// Dataset currentAuthorSummaryDS = spark -// .createDataset( -// sc -// .textFile(workingPath.concat("orcid_dataset/authors/*")) -// .map(item -> OBJECT_MAPPER.readValue(item, AuthorSummary.class)) -// .rdd(), -// Encoders.bean(AuthorSummary.class)); -// currentAuthorSummaryDS -// .joinWith( -// downloadedAuthorSummaryDS, -// currentAuthorSummaryDS -// .col("authorData.oid") -// .equalTo(downloadedAuthorSummaryDS.col("authorData.oid")), -// "full_outer") -// .map(value -> { -// Optional opCurrent = Optional.ofNullable(value._1()); -// Optional opDownloaded = Optional.ofNullable(value._2()); -// if (!opCurrent.isPresent()) { -// newAuthorsFoundAcc.add(1); -// return opDownloaded.get(); -// } -// if (!opDownloaded.isPresent()) { -// oldAuthorsFoundAcc.add(1); -// return opCurrent.get(); -// } -// if (opCurrent.isPresent() && opDownloaded.isPresent()) { -// updatedAuthorsFoundAcc.add(1); -// return opDownloaded.get(); -// } -// return null; -// }, -// Encoders.bean(AuthorSummary.class)) -// .filter(Objects::nonNull) -// .toJavaRDD() -// .map(authorSummary -> OBJECT_MAPPER.writeValueAsString(authorSummary)) -// .saveAsTextFile(workingPath.concat("orcid_dataset/new_authors"), GzipCodec.class); -// -// logger.info("oldAuthorsFoundAcc: " + oldAuthorsFoundAcc.value().toString()); -// logger.info("newAuthorsFoundAcc: " + newAuthorsFoundAcc.value().toString()); -// logger.info("updatedAuthorsFoundAcc: " + updatedAuthorsFoundAcc.value().toString()); -// logger.info("errorCodeFoundAcc: " + errorCodeAuthorsFoundAcc.value().toString()); -// logger.info("errorLoadingJsonFoundAcc: " + errorLoadingAuthorsJsonFoundAcc.value().toString()); -// logger.info("errorParsingXMLFoundAcc: " + errorParsingAuthorsXMLFoundAcc.value().toString()); - - Function retrieveWorkFunction = jsonData -> { + final Function retrieveWorkFunction = jsonData -> { Work work = new Work(); JsonElement jElement = new JsonParser().parse(jsonData); String statusCode = getJsonValue(jElement, "statusCode"); work.setStatusCode(statusCode); - String downloadDate = getJsonValue(jElement, "lastModifiedDate"); work.setDownloadDate("2020-11-18 00:00:05.644768"); if (statusCode.equals("200")) { String compressedData = getJsonValue(jElement, "compressedData"); @@ -247,9 +111,7 @@ public class SparkUpdateOrcidDatasets { .createDataset( sc .textFile(workingPath + "downloads/updated_works/*") - .map(s -> { - return s.substring(21, s.length() - 1); - }) + .map(s -> s.substring(21, s.length() - 1)) .map(retrieveWorkFunction) .rdd(), Encoders.bean(Work.class)); @@ -271,7 +133,7 @@ public class SparkUpdateOrcidDatasets { .col("workDetail.oid") .equalTo(downloadedWorksDS.col("workDetail.oid"))), "full_outer") - .map(value -> { + .map((MapFunction, Work>) value -> { Optional opCurrent = Optional.ofNullable(value._1()); Optional opDownloaded = Optional.ofNullable(value._2()); if (!opCurrent.isPresent()) { @@ -291,23 +153,22 @@ public class SparkUpdateOrcidDatasets { Encoders.bean(Work.class)) .filter(Objects::nonNull) .toJavaRDD() - .map(work -> OBJECT_MAPPER.writeValueAsString(work)) + .map(OBJECT_MAPPER::writeValueAsString) .saveAsTextFile(workingPath.concat("orcid_dataset/new_works"), GzipCodec.class); - logger.info("oldWorksFoundAcc: " + oldWorksFoundAcc.value().toString()); - logger.info("newWorksFoundAcc: " + newWorksFoundAcc.value().toString()); - logger.info("updatedWorksFoundAcc: " + updatedWorksFoundAcc.value().toString()); - logger.info("errorCodeWorksFoundAcc: " + errorCodeWorksFoundAcc.value().toString()); - logger.info("errorLoadingJsonWorksFoundAcc: " + errorLoadingWorksJsonFoundAcc.value().toString()); - logger.info("errorParsingXMLWorksFoundAcc: " + errorParsingWorksXMLFoundAcc.value().toString()); + logger.info("oldWorksFoundAcc: {}", oldWorksFoundAcc.value()); + logger.info("newWorksFoundAcc: {}", newWorksFoundAcc.value()); + logger.info("updatedWorksFoundAcc: {}", updatedWorksFoundAcc.value()); + logger.info("errorCodeWorksFoundAcc: {}", errorCodeWorksFoundAcc.value()); + logger.info("errorLoadingJsonWorksFoundAcc: {}", errorLoadingWorksJsonFoundAcc.value()); + logger.info("errorParsingXMLWorksFoundAcc: {}", errorParsingWorksXMLFoundAcc.value()); }); } private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { - JsonElement name = null; - name = jElement.getAsJsonObject().get(property); + JsonElement name = jElement.getAsJsonObject().get(property); if (name != null && !name.isJsonNull()) { return name.getAsString(); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java index 5ebbc01ed..64523941d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java @@ -13,6 +13,7 @@ import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.util.LongAccumulator; @@ -29,14 +30,16 @@ import eu.dnetlib.dhp.schema.orcid.Work; import eu.dnetlib.dhp.schema.orcid.WorkDetail; import eu.dnetlib.doiboost.orcid.util.HDFSUtil; import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; +import scala.Tuple2; public class SparkUpdateOrcidWorks { + public static final Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidWorks.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); public static void main(String[] args) throws Exception { - Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidWorks.class); final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -83,7 +86,6 @@ public class SparkUpdateOrcidWorks { JsonElement jElement = new JsonParser().parse(jsonData); String statusCode = getJsonValue(jElement, "statusCode"); work.setStatusCode(statusCode); - String downloadDate = getJsonValue(jElement, "lastModifiedDate"); work.setDownloadDate(Long.toString(System.currentTimeMillis())); if (statusCode.equals("200")) { String compressedData = getJsonValue(jElement, "compressedData"); @@ -112,9 +114,7 @@ public class SparkUpdateOrcidWorks { .createDataset( sc .textFile(workingPath + "downloads/updated_works/*") - .map(s -> { - return s.substring(21, s.length() - 1); - }) + .map(s -> s.substring(21, s.length() - 1)) .map(retrieveWorkFunction) .rdd(), Encoders.bean(Work.class)); @@ -136,7 +136,7 @@ public class SparkUpdateOrcidWorks { .col("workDetail.oid") .equalTo(downloadedWorksDS.col("workDetail.oid"))), "full_outer") - .map(value -> { + .map((MapFunction, Work>) value -> { Optional opCurrent = Optional.ofNullable(value._1()); Optional opDownloaded = Optional.ofNullable(value._2()); if (!opCurrent.isPresent()) { @@ -159,12 +159,12 @@ public class SparkUpdateOrcidWorks { .map(work -> OBJECT_MAPPER.writeValueAsString(work)) .saveAsTextFile(workingPath.concat("orcid_dataset/new_works"), GzipCodec.class); - logger.info("oldWorksFoundAcc: " + oldWorksFoundAcc.value().toString()); - logger.info("newWorksFoundAcc: " + newWorksFoundAcc.value().toString()); - logger.info("updatedWorksFoundAcc: " + updatedWorksFoundAcc.value().toString()); - logger.info("errorCodeWorksFoundAcc: " + errorCodeWorksFoundAcc.value().toString()); - logger.info("errorLoadingJsonWorksFoundAcc: " + errorLoadingWorksJsonFoundAcc.value().toString()); - logger.info("errorParsingXMLWorksFoundAcc: " + errorParsingWorksXMLFoundAcc.value().toString()); + logger.info("oldWorksFoundAcc: {}", oldWorksFoundAcc.value()); + logger.info("newWorksFoundAcc: {}", newWorksFoundAcc.value()); + logger.info("updatedWorksFoundAcc: {}", updatedWorksFoundAcc.value()); + logger.info("errorCodeWorksFoundAcc: {}", errorCodeWorksFoundAcc.value()); + logger.info("errorLoadingJsonWorksFoundAcc: {}", errorLoadingWorksJsonFoundAcc.value()); + logger.info("errorParsingXMLWorksFoundAcc: {}", errorParsingWorksXMLFoundAcc.value()); String lastModifiedDateFromLambdaFile = HDFSUtil .readFromTextFile(hdfsServerUri, workingPath, "last_modified_date_from_lambda_file.txt"); @@ -175,8 +175,7 @@ public class SparkUpdateOrcidWorks { private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { - JsonElement name = null; - name = jElement.getAsJsonObject().get(property); + JsonElement name = jElement.getAsJsonObject().get(property); if (name != null && !name.isJsonNull()) { return name.getAsString(); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java index c85b5b691..af6def227 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java @@ -20,6 +20,9 @@ import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.hadoop.io.compress.GzipCodec; import org.mortbay.log.Log; +import com.ximpleware.ParseException; + +import eu.dnetlib.dhp.parser.utility.VtdException; import eu.dnetlib.dhp.schema.orcid.AuthorData; import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; @@ -28,22 +31,23 @@ public class SummariesDecompressor { private static final int MAX_XML_RECORDS_PARSED = -1; - public static void parseGzSummaries(Configuration conf, String inputUri, Path outputPath) - throws Exception { - String uri = inputUri; - FileSystem fs = FileSystem.get(URI.create(uri), conf); - Path inputPath = new Path(uri); + private SummariesDecompressor() { + } + + public static void parseGzSummaries(Configuration conf, String inputUri, Path outputPath) throws IOException { + FileSystem fs = FileSystem.get(URI.create(inputUri), conf); + Path inputPath = new Path(inputUri); CompressionCodecFactory factory = new CompressionCodecFactory(conf); CompressionCodec codec = factory.getCodec(inputPath); if (codec == null) { - System.err.println("No codec found for " + uri); + System.err.println("No codec found for " + inputUri); System.exit(1); } - CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension()); + CompressionCodecFactory.removeSuffix(inputUri, codec.getDefaultExtension()); InputStream gzipInputStream = null; try { gzipInputStream = codec.createInputStream(fs.open(inputPath)); - parseTarSummaries(fs, conf, gzipInputStream, outputPath); + parseTarSummaries(conf, gzipInputStream, outputPath); } finally { Log.debug("Closing gzip stream"); @@ -52,7 +56,7 @@ public class SummariesDecompressor { } private static void parseTarSummaries( - FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) { + Configuration conf, InputStream gzipInputStream, Path outputPath) { int counter = 0; int nameFound = 0; int surnameFound = 0; @@ -163,7 +167,7 @@ public class SummariesDecompressor { } public static void extractXML(Configuration conf, String inputUri, Path outputPath) - throws Exception { + throws IOException, VtdException, ParseException { String uri = inputUri; FileSystem fs = FileSystem.get(URI.create(uri), conf); Path inputPath = new Path(uri); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java index a2342f7b4..9eb73b240 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java @@ -7,6 +7,9 @@ import eu.dnetlib.dhp.schema.orcid.WorkDetail; public class JsonHelper { + private JsonHelper() { + } + public static String createOidWork(WorkDetail workData) { return new Gson().toJson(workData); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/DownloadsReport.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/DownloadsReport.java new file mode 100644 index 000000000..b06b0af90 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/DownloadsReport.java @@ -0,0 +1,10 @@ + +package eu.dnetlib.doiboost.orcid.util; + +import java.util.LinkedHashMap; + +public class DownloadsReport extends LinkedHashMap { + + public DownloadsReport() { + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java index e1a913476..bbd2e1f7e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java @@ -2,10 +2,8 @@ package eu.dnetlib.doiboost.orcid.util; import java.io.*; -import java.net.URI; import java.nio.charset.StandardCharsets; -import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -14,42 +12,39 @@ import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.gson.Gson; - -import eu.dnetlib.doiboost.orcid.SparkDownloadOrcidAuthors; - public class HDFSUtil { static Logger logger = LoggerFactory.getLogger(HDFSUtil.class); + private HDFSUtil() { + } + private static FileSystem getFileSystem(String hdfsServerUri) throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsServerUri); - FileSystem fileSystem = FileSystem.get(conf); - return fileSystem; + return FileSystem.get(conf); } public static String readFromTextFile(String hdfsServerUri, String workingPath, String path) throws IOException { FileSystem fileSystem = getFileSystem(hdfsServerUri); Path toReadPath = new Path(workingPath.concat(path)); if (!fileSystem.exists(toReadPath)) { - throw new RuntimeException("File not exist: " + path); + throw new IOException("File not exist: " + path); } - logger.info("Last_update_path " + toReadPath); + logger.info("Last_update_path {}", toReadPath); FSDataInputStream inputStream = new FSDataInputStream(fileSystem.open(toReadPath)); - BufferedReader br = new BufferedReader(new InputStreamReader(inputStream)); - StringBuffer sb = new StringBuffer(); - try { + try (BufferedReader br = new BufferedReader(new InputStreamReader(inputStream))) { + StringBuilder sb = new StringBuilder(); + String line; while ((line = br.readLine()) != null) { sb.append(line); } - } finally { - br.close(); + + String buffer = sb.toString(); + logger.info("Last_update: {}", buffer); + return buffer; } - String buffer = sb.toString(); - logger.info("Last_update: " + buffer); - return buffer; } public static void writeToTextFile(String hdfsServerUri, String workingPath, String path, String text) @@ -60,8 +55,8 @@ public class HDFSUtil { fileSystem.delete(toWritePath, true); } FSDataOutputStream os = fileSystem.create(toWritePath); - BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8)); - br.write(text); - br.close(); + try (BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8))) { + br.write(text); + } } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/MultiAttemptsHttpConnector.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/MultiAttemptsHttpConnector.java new file mode 100644 index 000000000..5ef6efa26 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/MultiAttemptsHttpConnector.java @@ -0,0 +1,272 @@ + +package eu.dnetlib.doiboost.orcid.util; + +import static eu.dnetlib.dhp.utils.DHPUtils.MAPPER; + +import java.io.IOException; +import java.io.InputStream; +import java.net.*; +import java.util.List; +import java.util.Map; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.math.NumberUtils; +import org.apache.http.HttpHeaders; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; + +/** + * Derived from eu.dnetlib.dhp.common.collection.HttpConnector2 with custom report and Bearer auth + * + * @author enrico + */ +public class MultiAttemptsHttpConnector { + + private static final Logger log = LoggerFactory.getLogger(MultiAttemptsHttpConnector.class); + + private HttpClientParams clientParams; + + private String responseType = null; + + private static final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; + + private String authToken = ""; + private String acceptHeaderValue = ""; + private String authMethod = ""; + public final static String BEARER = "BEARER"; + + public MultiAttemptsHttpConnector() { + this(new HttpClientParams()); + } + + public MultiAttemptsHttpConnector(HttpClientParams clientParams) { + this.clientParams = clientParams; + CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL)); + } + + /** + * Given the URL returns the content via HTTP GET + * + * @param requestUrl the URL + * @param report the list of errors + * @return the content of the downloaded resource + * @throws CollectorException when retrying more than maxNumberOfRetry times + */ + public String getInputSource(final String requestUrl, DownloadsReport report) + throws CollectorException { + return attemptDownloadAsString(requestUrl, 1, report); + } + + private String attemptDownloadAsString(final String requestUrl, final int retryNumber, + final DownloadsReport report) throws CollectorException { + + try (InputStream s = attemptDownload(requestUrl, retryNumber, report)) { + return IOUtils.toString(s); + } catch (IOException e) { + log.error(e.getMessage(), e); + throw new CollectorException(e); + } + } + + private InputStream attemptDownload(final String requestUrl, final int retryNumber, + final DownloadsReport report) throws CollectorException, IOException { + + if (retryNumber > getClientParams().getMaxNumberOfRetry()) { + final String msg = String + .format( + "Max number of retries (%s/%s) exceeded, failing.", + retryNumber, getClientParams().getMaxNumberOfRetry()); + log.error(msg); + throw new CollectorException(msg); + } + + log.info("Request attempt {} [{}]", retryNumber, requestUrl); + + InputStream input = null; + + try { + if (getClientParams().getRequestDelay() > 0) { + backoffAndSleep(getClientParams().getRequestDelay()); + } + final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection(); + urlConn.setInstanceFollowRedirects(false); + urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000); + urlConn.setConnectTimeout(getClientParams().getConnectTimeOut() * 1000); + urlConn.addRequestProperty(HttpHeaders.USER_AGENT, userAgent); + + if (!getAcceptHeaderValue().isEmpty()) { + urlConn.addRequestProperty(HttpHeaders.ACCEPT, getAcceptHeaderValue()); + } + if (!getAuthToken().isEmpty() && getAuthMethod().equals(BEARER)) { + urlConn.addRequestProperty(HttpHeaders.AUTHORIZATION, String.format("Bearer %s", getAuthToken())); + } + + if (log.isDebugEnabled()) { + logHeaderFields(urlConn); + } + + int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); + if (is2xx(urlConn.getResponseCode())) { + input = urlConn.getInputStream(); + responseType = urlConn.getContentType(); + return input; + } + if (is3xx(urlConn.getResponseCode())) { + // REDIRECTS + final String newUrl = obtainNewLocation(urlConn.getHeaderFields()); + log.info("The requested url has been moved to {}", newUrl); + report + .put( + urlConn.getResponseCode(), + String.format("Moved to: %s", newUrl)); + urlConn.disconnect(); + if (retryAfter > 0) { + backoffAndSleep(retryAfter); + } + return attemptDownload(newUrl, retryNumber + 1, report); + } + if (is4xx(urlConn.getResponseCode()) || is5xx(urlConn.getResponseCode())) { + switch (urlConn.getResponseCode()) { + case HttpURLConnection.HTTP_NOT_FOUND: + case HttpURLConnection.HTTP_BAD_GATEWAY: + case HttpURLConnection.HTTP_UNAVAILABLE: + case HttpURLConnection.HTTP_GATEWAY_TIMEOUT: + if (retryAfter > 0) { + log + .warn( + "{} - waiting and repeating request after suggested retry-after {} sec.", + requestUrl, retryAfter); + backoffAndSleep(retryAfter * 1000); + } else { + log + .warn( + "{} - waiting and repeating request after default delay of {} sec.", + requestUrl, getClientParams().getRetryDelay()); + backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000); + } + report.put(urlConn.getResponseCode(), requestUrl); + urlConn.disconnect(); + return attemptDownload(requestUrl, retryNumber + 1, report); + default: + report + .put( + urlConn.getResponseCode(), + String + .format( + "%s Error: %s", requestUrl, urlConn.getResponseMessage())); + throw new CollectorException(urlConn.getResponseCode() + " error " + report); + } + } + throw new CollectorException( + String + .format( + "Unexpected status code: %s errors: %s", urlConn.getResponseCode(), + MAPPER.writeValueAsString(report))); + } catch (MalformedURLException | UnknownHostException e) { + log.error(e.getMessage(), e); + report.put(-2, e.getMessage()); + throw new CollectorException(e.getMessage(), e); + } catch (SocketTimeoutException | SocketException e) { + log.error(e.getMessage(), e); + report.put(-3, e.getMessage()); + backoffAndSleep(getClientParams().getRetryDelay() * retryNumber * 1000); + return attemptDownload(requestUrl, retryNumber + 1, report); + } + } + + private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { + log.debug("StatusCode: {}", urlConn.getResponseMessage()); + + for (Map.Entry> e : urlConn.getHeaderFields().entrySet()) { + if (e.getKey() != null) { + for (String v : e.getValue()) { + log.debug(" key: {} - value: {}", e.getKey(), v); + } + } + } + } + + private void backoffAndSleep(int sleepTimeMs) throws CollectorException { + log.info("I'm going to sleep for {}ms", sleepTimeMs); + try { + Thread.sleep(sleepTimeMs); + } catch (InterruptedException e) { + log.error(e.getMessage(), e); + throw new CollectorException(e); + } + } + + private int obtainRetryAfter(final Map> headerMap) { + for (String key : headerMap.keySet()) { + if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty()) + && NumberUtils.isCreatable(headerMap.get(key).get(0))) { + return Integer.parseInt(headerMap.get(key).get(0)) + 10; + } + } + return -1; + } + + private String obtainNewLocation(final Map> headerMap) throws CollectorException { + for (String key : headerMap.keySet()) { + if ((key != null) && key.equalsIgnoreCase(HttpHeaders.LOCATION) && (headerMap.get(key).size() > 0)) { + return headerMap.get(key).get(0); + } + } + throw new CollectorException("The requested url has been MOVED, but 'location' param is MISSING"); + } + + private boolean is2xx(final int statusCode) { + return statusCode >= 200 && statusCode <= 299; + } + + private boolean is4xx(final int statusCode) { + return statusCode >= 400 && statusCode <= 499; + } + + private boolean is3xx(final int statusCode) { + return statusCode >= 300 && statusCode <= 399; + } + + private boolean is5xx(final int statusCode) { + return statusCode >= 500 && statusCode <= 599; + } + + public String getResponseType() { + return responseType; + } + + public HttpClientParams getClientParams() { + return clientParams; + } + + public void setClientParams(HttpClientParams clientParams) { + this.clientParams = clientParams; + } + + public void setAuthToken(String authToken) { + this.authToken = authToken; + } + + private String getAuthToken() { + return authToken; + } + + public String getAcceptHeaderValue() { + return acceptHeaderValue; + } + + public void setAcceptHeaderValue(String acceptHeaderValue) { + this.acceptHeaderValue = acceptHeaderValue; + } + + public String getAuthMethod() { + return authMethod; + } + + public void setAuthMethod(String authMethod) { + this.authMethod = authMethod; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java index 52e076105..e8745aa96 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java @@ -1,7 +1,6 @@ package eu.dnetlib.doiboost.orcid.xml; -import java.io.IOException; import java.util.*; import org.apache.commons.lang3.StringUtils; @@ -38,6 +37,9 @@ public class XMLRecordParser { private static final String NS_ERROR = "error"; + private XMLRecordParser() { + } + public static AuthorData VTDParseAuthorData(byte[] bytes) throws VtdException, ParseException { final VTDGen vg = new VTDGen(); @@ -90,46 +92,6 @@ public class XMLRecordParser { authorData.setOtherNames(otherNames); } -// final String creationMethod = VtdUtilityParser.getSingleValue(ap, vn, "//history:creation-method"); -// if (StringUtils.isNoneBlank(creationMethod)) { -// authorData.setCreationMethod(creationMethod); -// } -// -// final String completionDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:completion-date"); -// if (StringUtils.isNoneBlank(completionDate)) { -// authorData.setCompletionDate(completionDate); -// } -// -// final String submissionDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:submission-date"); -// if (StringUtils.isNoneBlank(submissionDate)) { -// authorData.setSubmissionDate(submissionDate); -// } -// -// final String claimed = VtdUtilityParser.getSingleValue(ap, vn, "//history:claimed"); -// if (StringUtils.isNoneBlank(claimed)) { -// authorData.setClaimed(Boolean.parseBoolean(claimed)); -// } -// -// final String verifiedEmail = VtdUtilityParser.getSingleValue(ap, vn, "//history:verified-email"); -// if (StringUtils.isNoneBlank(verifiedEmail)) { -// authorData.setVerifiedEmail(Boolean.parseBoolean(verifiedEmail)); -// } -// -// final String verifiedPrimaryEmail = VtdUtilityParser.getSingleValue(ap, vn, "//history:verified-primary-email"); -// if (StringUtils.isNoneBlank(verifiedPrimaryEmail)) { -// authorData.setVerifiedPrimaryEmail(Boolean.parseBoolean(verifiedPrimaryEmail)); -// } -// -// final String deactivationDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:deactivation-date"); -// if (StringUtils.isNoneBlank(deactivationDate)) { -// authorData.setDeactivationDate(deactivationDate); -// } -// -// final String lastModifiedDate = VtdUtilityParser -// .getSingleValue(ap, vn, "//history:history/common:last-modified-date"); -// if (StringUtils.isNoneBlank(lastModifiedDate)) { -// authorData.setLastModifiedDate(lastModifiedDate); -// } return authorData; } @@ -207,7 +169,7 @@ public class XMLRecordParser { } public static Map retrieveWorkIdLastModifiedDate(byte[] bytes) - throws ParseException, XPathParseException, NavException, XPathEvalException, IOException { + throws ParseException, XPathParseException, NavException, XPathEvalException { final VTDGen vg = new VTDGen(); vg.setDoc(bytes); vg.parse(true); @@ -251,15 +213,15 @@ public class XMLRecordParser { ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL); ap.declareXPathNameSpace(NS_HISTORY, NS_HISTORY_URL); - AuthorData authorData = retrieveAuthorData(ap, vn, bytes); - AuthorHistory authorHistory = retrieveAuthorHistory(ap, vn, bytes); + AuthorData authorData = retrieveAuthorData(ap, vn); + AuthorHistory authorHistory = retrieveAuthorHistory(ap, vn); AuthorSummary authorSummary = new AuthorSummary(); authorSummary.setAuthorData(authorData); authorSummary.setAuthorHistory(authorHistory); return authorSummary; } - private static AuthorData retrieveAuthorData(AutoPilot ap, VTDNav vn, byte[] bytes) + private static AuthorData retrieveAuthorData(AutoPilot ap, VTDNav vn) throws VtdException { AuthorData authorData = new AuthorData(); final List errors = VtdUtilityParser.getTextValue(ap, vn, "//error:response-code"); @@ -300,7 +262,7 @@ public class XMLRecordParser { return authorData; } - private static AuthorHistory retrieveAuthorHistory(AutoPilot ap, VTDNav vn, byte[] bytes) + private static AuthorHistory retrieveAuthorHistory(AutoPilot ap, VTDNav vn) throws VtdException { AuthorHistory authorHistory = new AuthorHistory(); final String creationMethod = VtdUtilityParser.getSingleValue(ap, vn, "//history:creation-method"); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java index 124a1b9ef..ddbf71bbb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java @@ -33,6 +33,9 @@ public class ActivitiesDumpReader { private static final int MAX_XML_WORKS_PARSED = -1; private static final int XML_WORKS_PARSED_COUNTER_LOG_INTERVAL = 100000; + private ActivitiesDumpReader() { + } + public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath) throws Exception { String uri = inputUri; @@ -48,7 +51,7 @@ public class ActivitiesDumpReader { InputStream gzipInputStream = null; try { gzipInputStream = codec.createInputStream(fs.open(inputPath)); - parseTarActivities(fs, conf, gzipInputStream, outputPath); + parseTarActivities(conf, gzipInputStream, outputPath); } finally { Log.debug("Closing gzip stream"); @@ -56,8 +59,7 @@ public class ActivitiesDumpReader { } } - private static void parseTarActivities( - FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) { + private static void parseTarActivities(Configuration conf, InputStream gzipInputStream, Path outputPath) { int counter = 0; int noDoiFound = 0; int errorFromOrcidFound = 0; @@ -73,7 +75,7 @@ public class ActivitiesDumpReader { SequenceFile.Writer.valueClass(Text.class))) { while ((entry = tais.getNextTarEntry()) != null) { String filename = entry.getName(); - StringBuffer buffer = new StringBuffer(); + StringBuilder builder = new StringBuilder(); try { if (entry.isDirectory() || !filename.contains("works")) { @@ -83,12 +85,12 @@ public class ActivitiesDumpReader { BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from // tarInput String line; - buffer = new StringBuffer(); + builder = new StringBuilder(); while ((line = br.readLine()) != null) { - buffer.append(line); + builder.append(line); } WorkDetail workDetail = XMLRecordParserNoDoi - .VTDParseWorkData(buffer.toString().getBytes()); + .VTDParseWorkData(builder.toString().getBytes()); if (workDetail != null) { if (workDetail.getErrorCode() != null) { errorFromOrcidFound += 1; @@ -123,7 +125,7 @@ public class ActivitiesDumpReader { } } else { - Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer); + Log.warn("Data not retrievable [" + entry.getName() + "] " + builder); xmlParserErrorFound += 1; } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java index 4a64124d1..5c23a33a8 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java @@ -3,9 +3,9 @@ package eu.dnetlib.doiboost.orcidnodoi; import java.io.IOException; +import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.mortbay.log.Log; @@ -16,7 +16,6 @@ import eu.dnetlib.doiboost.orcid.OrcidDSManager; * This job generates one sequence file, the key is an orcid identifier and the * value is an orcid publication in json format */ - public class GenOrcidAuthorWork extends OrcidDSManager { private String activitiesFileNameTarGz; @@ -30,13 +29,12 @@ public class GenOrcidAuthorWork extends OrcidDSManager { public void generateAuthorsDOIsData() throws Exception { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz); Path outputPath = new Path(hdfsServerUri.concat(workingPath).concat(outputWorksPath)); ActivitiesDumpReader.parseGzActivities(conf, tarGzUri, outputPath); } - private void loadArgs(String[] args) throws Exception { + private void loadArgs(String[] args) throws ParseException, IOException { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index 1d47808ef..db3b14923 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -3,7 +3,7 @@ package eu.dnetlib.doiboost.orcidnodoi; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; +import java.io.FileNotFoundException; import java.util.Arrays; import java.util.List; import java.util.Objects; @@ -14,21 +14,16 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.util.LongAccumulator; -import org.mortbay.log.Log; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.gson.Gson; -import com.google.gson.JsonElement; -import com.google.gson.JsonParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; @@ -75,7 +70,7 @@ public class SparkGenEnrichedOrcidWorks { spark -> { String lastUpdate = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt"); if (StringUtils.isBlank(lastUpdate)) { - throw new RuntimeException("last update info not found"); + throw new FileNotFoundException("last update info not found"); } final String dateOfCollection = lastUpdate.substring(0, 10); JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -86,10 +81,10 @@ public class SparkGenEnrichedOrcidWorks { .textFile(workingPath.concat(orcidDataFolder).concat("/authors/*")) .map(item -> OBJECT_MAPPER.readValue(item, AuthorSummary.class)) .filter(authorSummary -> authorSummary.getAuthorData() != null) - .map(authorSummary -> authorSummary.getAuthorData()) + .map(AuthorSummary::getAuthorData) .rdd(), Encoders.bean(AuthorData.class)); - logger.info("Authors data loaded: " + authorDataset.count()); + logger.info("Authors data loaded: {}", authorDataset.count()); Dataset workDataset = spark .createDataset( @@ -97,7 +92,7 @@ public class SparkGenEnrichedOrcidWorks { .textFile(workingPath.concat(orcidDataFolder).concat("/works/*")) .map(item -> OBJECT_MAPPER.readValue(item, Work.class)) .filter(work -> work.getWorkDetail() != null) - .map(work -> work.getWorkDetail()) + .map(Work::getWorkDetail) .filter(work -> work.getErrorCode() == null) .filter( work -> work @@ -107,7 +102,7 @@ public class SparkGenEnrichedOrcidWorks { .noneMatch(e -> e.getType().equalsIgnoreCase("doi"))) .rdd(), Encoders.bean(WorkDetail.class)); - logger.info("Works data loaded: " + workDataset.count()); + logger.info("Works data loaded: {}", workDataset.count()); final LongAccumulator warnNotFoundContributors = spark .sparkContext() @@ -122,7 +117,7 @@ public class SparkGenEnrichedOrcidWorks { WorkDetail w = value._1; AuthorData a = value._2; if (w.getContributors() == null - || (w.getContributors() != null && w.getContributors().size() == 0)) { + || (w.getContributors() != null && w.getContributors().isEmpty())) { Contributor c = new Contributor(); c.setName(a.getName()); c.setSurname(a.getSurname()); @@ -163,7 +158,6 @@ public class SparkGenEnrichedOrcidWorks { final PublicationToOaf publicationToOaf = new PublicationToOaf( parsedPublications, enrichedPublications, - errorsGeneric, errorsInvalidTitle, errorsNotFoundAuthors, errorsInvalidType, @@ -172,13 +166,10 @@ public class SparkGenEnrichedOrcidWorks { titleNotProvidedAcc, noUrlAcc, dateOfCollection); + JavaRDD oafPublicationRDD = enrichedWorksRDD - .map( - e -> { - return (Publication) publicationToOaf - .generatePublicationActionsFromJson(e._2()); - }) - .filter(p -> p != null); + .map(e -> (Publication) publicationToOaf.generatePublicationActionsFromJson(e._2())) + .filter(Objects::nonNull); sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress", "true"); @@ -186,7 +177,7 @@ public class SparkGenEnrichedOrcidWorks { .mapToPair( p -> new Tuple2<>(p.getClass().toString(), OBJECT_MAPPER.writeValueAsString(new AtomicAction<>(Publication.class, p)))) - .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) + .mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))) .saveAsNewAPIHadoopFile( outputEnrichedWorksPath, Text.class, @@ -194,17 +185,17 @@ public class SparkGenEnrichedOrcidWorks { SequenceFileOutputFormat.class, sc.hadoopConfiguration()); - logger.info("parsedPublications: " + parsedPublications.value().toString()); - logger.info("enrichedPublications: " + enrichedPublications.value().toString()); - logger.info("warnNotFoundContributors: " + warnNotFoundContributors.value().toString()); - logger.info("errorsGeneric: " + errorsGeneric.value().toString()); - logger.info("errorsInvalidTitle: " + errorsInvalidTitle.value().toString()); - logger.info("errorsNotFoundAuthors: " + errorsNotFoundAuthors.value().toString()); - logger.info("errorsInvalidType: " + errorsInvalidType.value().toString()); - logger.info("otherTypeFound: " + otherTypeFound.value().toString()); - logger.info("deactivatedAcc: " + deactivatedAcc.value().toString()); - logger.info("titleNotProvidedAcc: " + titleNotProvidedAcc.value().toString()); - logger.info("noUrlAcc: " + noUrlAcc.value().toString()); + logger.info("parsedPublications: {}", parsedPublications.value()); + logger.info("enrichedPublications: {}", enrichedPublications.value()); + logger.info("warnNotFoundContributors: {}", warnNotFoundContributors.value()); + logger.info("errorsGeneric: {}", errorsGeneric.value()); + logger.info("errorsInvalidTitle: {}", errorsInvalidTitle.value()); + logger.info("errorsNotFoundAuthors: {}", errorsNotFoundAuthors.value()); + logger.info("errorsInvalidType: {}", errorsInvalidType.value()); + logger.info("otherTypeFound: {}", otherTypeFound.value()); + logger.info("deactivatedAcc: {}", deactivatedAcc.value()); + logger.info("titleNotProvidedAcc: {}", titleNotProvidedAcc.value()); + logger.info("noUrlAcc: {}", noUrlAcc.value()); }); } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java index 23e9dd884..33f3b3bbb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java @@ -18,6 +18,9 @@ public class JsonWriter { public static final com.fasterxml.jackson.databind.ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); + private JsonWriter() { + } + public static String create(AuthorData authorData) throws JsonProcessingException { return OBJECT_MAPPER.writeValueAsString(authorData); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index 215753899..f92040c24 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -18,6 +18,7 @@ import com.google.gson.*; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility; @@ -26,21 +27,19 @@ import eu.dnetlib.doiboost.orcidnodoi.util.Pair; /** * This class converts an orcid publication from json format to oaf */ - public class PublicationToOaf implements Serializable { static Logger logger = LoggerFactory.getLogger(PublicationToOaf.class); - public final static String orcidPREFIX = "orcid_______"; + public static final String orcidPREFIX = "orcid_______"; public static final String OPENAIRE_PREFIX = "openaire____"; - public static final String SEPARATOR = "::"; + public static final String SEPARATOR = IdentifierFactory.ID_SEPARATOR; public static final String DEACTIVATED_NAME = "Given Names Deactivated"; public static final String DEACTIVATED_SURNAME = "Family Name Deactivated"; private String dateOfCollection = ""; private final LongAccumulator parsedPublications; private final LongAccumulator enrichedPublications; - private final LongAccumulator errorsGeneric; private final LongAccumulator errorsInvalidTitle; private final LongAccumulator errorsNotFoundAuthors; private final LongAccumulator errorsInvalidType; @@ -52,7 +51,6 @@ public class PublicationToOaf implements Serializable { public PublicationToOaf( LongAccumulator parsedPublications, LongAccumulator enrichedPublications, - LongAccumulator errorsGeneric, LongAccumulator errorsInvalidTitle, LongAccumulator errorsNotFoundAuthors, LongAccumulator errorsInvalidType, @@ -63,7 +61,6 @@ public class PublicationToOaf implements Serializable { String dateOfCollection) { this.parsedPublications = parsedPublications; this.enrichedPublications = enrichedPublications; - this.errorsGeneric = errorsGeneric; this.errorsInvalidTitle = errorsInvalidTitle; this.errorsNotFoundAuthors = errorsNotFoundAuthors; this.errorsInvalidType = errorsInvalidType; @@ -77,7 +74,6 @@ public class PublicationToOaf implements Serializable { public PublicationToOaf() { this.parsedPublications = null; this.enrichedPublications = null; - this.errorsGeneric = null; this.errorsInvalidTitle = null; this.errorsNotFoundAuthors = null; this.errorsInvalidType = null; @@ -88,19 +84,8 @@ public class PublicationToOaf implements Serializable { this.dateOfCollection = null; } - private static final Map> datasources = new HashMap>() { - - { - put( - ModelConstants.ORCID, - new Pair<>(ModelConstants.ORCID.toUpperCase(), OPENAIRE_PREFIX + SEPARATOR + ModelConstants.ORCID)); - - } - }; - // json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname private static final Map> externalIds = new HashMap>() { - { put("ark".toLowerCase(), new Pair<>("ark", "ark")); put("arxiv".toLowerCase(), new Pair<>("arXiv", "arXiv")); @@ -208,10 +193,8 @@ public class PublicationToOaf implements Serializable { .setTitle( titles .stream() - .map(t -> { - return mapStructuredProperty(t, ModelConstants.MAIN_TITLE_QUALIFIER, null); - }) - .filter(s -> s != null) + .map(t -> mapStructuredProperty(t, ModelConstants.MAIN_TITLE_QUALIFIER, null)) + .filter(Objects::nonNull) .collect(Collectors.toList())); // Adding identifier final String id = getStringValue(rootElement, "id"); @@ -226,7 +209,7 @@ public class PublicationToOaf implements Serializable { publication.setId(sourceId); // Adding relevant date - settingRelevantDate(rootElement, publication, "publication_date", "issued", true); + settingRelevantDate(rootElement, publication, "issued", true); // Adding collectedfrom publication.setCollectedfrom(Arrays.asList(createCollectedFrom())); @@ -243,7 +226,7 @@ public class PublicationToOaf implements Serializable { Map publicationType = typologiesMapping.get(type); if ((publicationType == null || publicationType.isEmpty()) && errorsInvalidType != null) { errorsInvalidType.add(1); - logger.error("publication_type_not_found: " + type); + logger.error("publication_type_not_found: {}", type); return null; } @@ -307,7 +290,7 @@ public class PublicationToOaf implements Serializable { // Adding authors final List authors = createAuthors(rootElement); - if (authors != null && authors.size() > 0) { + if (authors != null && !authors.isEmpty()) { if (authors.stream().filter(a -> { return ((Objects.nonNull(a.getName()) && a.getName().equals(DEACTIVATED_NAME)) || (Objects.nonNull(a.getSurname()) && a.getSurname().equals(DEACTIVATED_SURNAME))); @@ -322,8 +305,7 @@ public class PublicationToOaf implements Serializable { } else { if (authors == null) { Gson gson = new GsonBuilder().setPrettyPrinting().create(); - String json = gson.toJson(rootElement); - throw new RuntimeException("not_valid_authors: " + json); + throw new RuntimeException("not_valid_authors: " + gson.toJson(rootElement)); } else { if (errorsNotFoundAuthors != null) { errorsNotFoundAuthors.add(1); @@ -434,7 +416,6 @@ public class PublicationToOaf implements Serializable { private void settingRelevantDate(final JsonObject rootElement, final Publication publication, - final String jsonKey, final String dictionaryKey, final boolean addToDateOfAcceptance) { @@ -450,10 +431,8 @@ public class PublicationToOaf implements Serializable { Arrays .asList(pubDate) .stream() - .map(r -> { - return mapStructuredProperty(r, q, null); - }) - .filter(s -> s != null) + .map(r -> mapStructuredProperty(r, q, null)) + .filter(Objects::nonNull) .collect(Collectors.toList())); } } @@ -498,7 +477,7 @@ public class PublicationToOaf implements Serializable { final String type = getStringValue(rootElement, "type"); if (!typologiesMapping.containsKey(type)) { - logger.error("unknowntype_" + type); + logger.error("unknowntype_{}", type); if (errorsInvalidType != null) { errorsInvalidType.add(1); } @@ -550,7 +529,7 @@ public class PublicationToOaf implements Serializable { } private StructuredProperty mapStructuredProperty(String value, Qualifier qualifier, DataInfo dataInfo) { - if (value == null | StringUtils.isBlank(value)) { + if (value == null || StringUtils.isBlank(value)) { return null; } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java index fff753ff3..e69b496b7 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java @@ -1,26 +1,14 @@ package eu.dnetlib.doiboost.orcidnodoi.similarity; -import java.io.IOException; import java.text.Normalizer; import java.util.*; import org.apache.commons.lang3.StringUtils; import org.apache.commons.text.similarity.JaroWinklerSimilarity; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import com.ximpleware.NavException; -import com.ximpleware.ParseException; -import com.ximpleware.XPathEvalException; -import com.ximpleware.XPathParseException; - -import eu.dnetlib.dhp.parser.utility.VtdException; import eu.dnetlib.dhp.schema.orcid.AuthorData; import eu.dnetlib.dhp.schema.orcid.Contributor; -import eu.dnetlib.dhp.schema.orcid.WorkDetail; /** * This class is used for searching from a list of publication contributors a @@ -29,18 +17,16 @@ import eu.dnetlib.dhp.schema.orcid.WorkDetail; * the match is found (if exist) author informations are used to enrich the * matched contribuotr inside contributors list */ - public class AuthorMatcher { - private static final Logger logger = LoggerFactory.getLogger(AuthorMatcher.class); - public static final Double threshold = 0.8; + public static final Double THRESHOLD = 0.8; - public static void match(AuthorData author, List contributors) - throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { + private AuthorMatcher() { + } + public static void match(AuthorData author, List contributors) { int matchCounter = 0; List matchCounters = Arrays.asList(matchCounter); - Contributor contributor = null; contributors .stream() .filter(c -> !StringUtils.isBlank(c.getCreditName())) @@ -62,8 +48,8 @@ public class AuthorMatcher { c.setScore(bestMatch(author.getName(), author.getSurname(), c.getCreditName())); return c; }) - .filter(c -> c.getScore() >= threshold) - .max(Comparator.comparing(c -> c.getScore())); + .filter(c -> c.getScore() >= THRESHOLD) + .max(Comparator.comparing(Contributor::getScore)); Contributor bestMatchContributor = null; if (optCon.isPresent()) { bestMatchContributor = optCon.get(); @@ -73,14 +59,14 @@ public class AuthorMatcher { } else if (matchCounters.get(0) > 1) { Optional optCon = contributors .stream() - .filter(c -> c.isSimpleMatch()) + .filter(Contributor::isSimpleMatch) .filter(c -> !StringUtils.isBlank(c.getCreditName())) .map(c -> { c.setScore(bestMatch(author.getName(), author.getSurname(), c.getCreditName())); return c; }) - .filter(c -> c.getScore() >= threshold) - .max(Comparator.comparing(c -> c.getScore())); + .filter(c -> c.getScore() >= THRESHOLD) + .max(Comparator.comparing(Contributor::getScore)); Contributor bestMatchContributor = null; if (optCon.isPresent()) { bestMatchContributor = optCon.get(); @@ -92,7 +78,7 @@ public class AuthorMatcher { } public static boolean simpleMatchOnOtherNames(String name, List otherNames) { - if (otherNames == null || (otherNames != null && otherNames.isEmpty())) { + if (otherNames == null || otherNames.isEmpty()) { return false; } return otherNames.stream().filter(o -> simpleMatch(name, o)).count() > 0; @@ -132,8 +118,7 @@ public class AuthorMatcher { } public static Double similarity(String nameA, String surnameA, String nameB, String surnameB) { - Double score = similarityJaroWinkler(nameA, surnameA, nameB, surnameB); - return score; + return similarityJaroWinkler(nameA, surnameA, nameB, surnameB); } private static Double similarityJaroWinkler(String nameA, String surnameA, String nameB, String surnameB) { @@ -179,7 +164,7 @@ public class AuthorMatcher { public static void updateAuthorsSimilarityMatch(List contributors, AuthorData author) { contributors .stream() - .filter(c -> c.isBestMatch()) + .filter(Contributor::isBestMatch) .forEach(c -> { c.setName(author.getName()); c.setSurname(author.getSurname()); @@ -206,9 +191,4 @@ public class AuthorMatcher { } } - private static String toJson(WorkDetail work) { - GsonBuilder builder = new GsonBuilder(); - Gson gson = builder.create(); - return gson.toJson(work); - } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java index 29791bbbd..b4c12eed3 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java @@ -5,9 +5,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import com.ximpleware.*; import eu.dnetlib.dhp.parser.utility.VtdException; @@ -20,21 +17,10 @@ import eu.dnetlib.dhp.schema.orcid.WorkDetail; /** * This class is used for parsing xml data with vtd parser */ - public class XMLRecordParserNoDoi { - private static final Logger logger = LoggerFactory.getLogger(XMLRecordParserNoDoi.class); - private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common"; private static final String NS_COMMON = "common"; - private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person"; - private static final String NS_PERSON = "person"; - private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details"; - private static final String NS_DETAILS = "personal-details"; - private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name"; - private static final String NS_OTHER = "other-name"; - private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record"; - private static final String NS_RECORD = "record"; private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error"; private static final String NS_WORK = "work"; @@ -42,6 +28,9 @@ public class XMLRecordParserNoDoi { private static final String NS_ERROR = "error"; + private XMLRecordParserNoDoi() { + } + public static WorkDetail VTDParseWorkData(byte[] bytes) throws VtdException, ParseException, XPathParseException, NavException, XPathEvalException { @@ -100,16 +89,16 @@ public class XMLRecordParserNoDoi { workData.setUrls(urls); } - workData.setPublicationDates(getPublicationDates(vg, vn, ap)); - workData.setExtIds(getExternalIds(vg, vn, ap)); - workData.setContributors(getContributors(vg, vn, ap)); + workData.setPublicationDates(getPublicationDates(vn, ap)); + workData.setExtIds(getExternalIds(vn, ap)); + workData.setContributors(getContributors(vn, ap)); return workData; } - private static List getPublicationDates(VTDGen vg, VTDNav vn, AutoPilot ap) + private static List getPublicationDates(VTDNav vn, AutoPilot ap) throws XPathParseException, NavException, XPathEvalException { - List publicationDates = new ArrayList(); + List publicationDates = new ArrayList<>(); int yearIndex = 0; ap.selectXPath("//common:publication-date/common:year"); while (ap.evalXPath() != -1) { @@ -142,9 +131,9 @@ public class XMLRecordParserNoDoi { return publicationDates; } - private static List getExternalIds(VTDGen vg, VTDNav vn, AutoPilot ap) + private static List getExternalIds(VTDNav vn, AutoPilot ap) throws XPathParseException, NavException, XPathEvalException { - List extIds = new ArrayList(); + List extIds = new ArrayList<>(); int typeIndex = 0; ap.selectXPath("//common:external-id/common:external-id-type"); while (ap.evalXPath() != -1) { @@ -177,12 +166,12 @@ public class XMLRecordParserNoDoi { if (typeIndex == valueIndex) { return extIds; } - return new ArrayList(); + return new ArrayList<>(); } - private static List getContributors(VTDGen vg, VTDNav vn, AutoPilot ap) + private static List getContributors(VTDNav vn, AutoPilot ap) throws XPathParseException, NavException, XPathEvalException { - List contributors = new ArrayList(); + List contributors = new ArrayList<>(); ap.selectXPath("//work:contributors/work:contributor"); while (ap.evalXPath() != -1) { Contributor contributor = new Contributor(); diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/crossref_dump_reader.json similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader.json rename to dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/crossref_dump_reader.json diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json index 63e080337..d903ebe1d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json @@ -1,21 +1,16 @@ [ - { - "paramName": "s", - "paramLongName": "sourcePath", - "paramDescription": "the source mdstore path", - "paramRequired": true - }, - - { - "paramName": "t", - "paramLongName": "targetPath", - "paramDescription": "the target mdstore path", - "paramRequired": true - }, - { - "paramName": "m", - "paramLongName": "master", + {"paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the source path", + "paramRequired": true}, + {"paramName":"m", + "paramLongName":"master", "paramDescription": "the master name", - "paramRequired": true - } -] \ No newline at end of file + "paramRequired": true}, + {"paramName":"t", + "paramLongName":"targetPath", + "paramDescription": "the target path", + "paramRequired": true} + +] + diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh new file mode 100644 index 000000000..bb7ec0b45 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh @@ -0,0 +1,2 @@ +#!/bin/bash +curl -LSs -H "Crossref-Plus-API-Token: Bearer $4" $1 | hdfs dfs -put - $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml index 506d86a08..4a98a4bb4 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml @@ -1,13 +1,5 @@ - + - - crossrefDumpPath - the working dir base path - - - inputPathCrossref - the working dir base path - sparkDriverMemory memory for driver process @@ -18,27 +10,94 @@ sparkExecutorCores - 2 number of cores used by single executor + + + crossrefdumpfilename + the Crossref input path + + + crossrefDumpPath + the Crossref dump path + + + crossrefdumptoken + the token for the API dump path + + + - + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + ${wf:conf('resumeFrom') eq 'ImportCrossRef'} + ${wf:conf('resumeFrom') eq 'Unpack'} + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${url} + ${crossrefDumpPath} + ${crossrefdumpfilename} + ${crossrefdumptoken} + HADOOP_USER_NAME=${wf:user()} + download.sh + + + + + + ${jobTracker} ${nameNode} eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords --hdfsServerUri${nameNode} - --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz + --crossrefFileNameTarGz${crossrefdumpfilename} --workingPath${crossrefDumpPath} - --outputPath${workingDir}/files/ + --outputPath${crossrefDumpPath}/files/ @@ -48,7 +107,7 @@ yarn-cluster cluster - SparkGenerateCrossrefDataset + SparkUnpackCrossrefEntries eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries dhp-doiboost-${projectVersion}.jar @@ -63,56 +122,14 @@ --masteryarn-cluster --sourcePath${crossrefDumpPath}/files - --targetPath${inputPathCrossref}/crossref_ds - - - - - - - - - yarn-cluster - cluster - SparkGenerateCrossrefDataset - eu.dnetlib.doiboost.crossref.GenerateCrossrefDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${inputPathCrossref}/crossref_ds - --targetPath${inputPathCrossref}/crossref_ds_updates + --targetPath${crossrefDumpPath}/crossref_unpack/ - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json new file mode 100644 index 000000000..9ccb70a9f --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json @@ -0,0 +1,39 @@ +[ + + { + "paramName": "fu", + "paramLongName" : "fileURL", + "paramDescription" : "the url of the file to download", + "paramRequired" : true + }, + { + "paramName": "hp", + "paramLongName" : "hdfsPath", + "paramDescription" : "where to save the file", + "paramRequired" : true + }, + { + "paramName": "hnn", + "paramLongName" : "hdfsNameNode", + "paramDescription" : "the name node", + "paramRequired" : true + }, + { + "paramName": "cfn", + "paramLongName" : "classForName", + "paramDescription" : "the name of the class to deserialize the csv to", + "paramRequired" : true +}, { + "paramName": "sn", + "paramLongName" : "sheetName", + "paramDescription" : "the name of the sheet in case the file is excel", + "paramRequired" : false +}, { + "paramName": "d", + "paramLongName" : "delimiter", + "paramDescription" : "the delimiter between fields in case it is not ;", + "paramRequired" : false +} + + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json index 1ff63dd0e..39455fb67 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json @@ -1,7 +1,8 @@ [ {"paramName": "m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true}, {"paramName": "hb", "paramLongName":"hostedByMapPath", "paramDescription": "the hosted By Map Path", "paramRequired": true}, + {"paramName": "oo", "paramLongName":"openaireOrganizationPath", "paramDescription": "the openaire Organization Path", "paramRequired": true}, {"paramName": "ap", "paramLongName":"affiliationPath", "paramDescription": "the Affliation Path", "paramRequired": true}, {"paramName": "pa", "paramLongName":"paperAffiliationPath", "paramDescription": "the paperAffiliation Path", "paramRequired": true}, - {"paramName": "w", "paramLongName":"workingPath", "paramDescription": "the Working Path", "paramRequired": true} + {"paramName": "w", "paramLongName":"workingPath", "paramDescription": "the Working Path", "paramRequired": true} ] diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index 3485ec01a..ab3b9593e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -63,12 +63,10 @@ - ${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'} - ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'} ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} ${wf:conf('resumeFrom') eq 'ConvertMagToDataset'} ${wf:conf('resumeFrom') eq 'PreProcessORCID'} - + @@ -76,45 +74,6 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords - --hdfsServerUri${nameNode} - --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz - --workingPath${crossrefDumpPath} - --outputPath${crossrefDumpPath}/files/ - - - - - - - - yarn-cluster - cluster - SparkUnpackCrossrefEntries - eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${crossrefDumpPath}/files - --targetPath${crossrefDumpPath}/crossref_unpack/ - - - - - @@ -138,18 +97,11 @@ --targetPath${inputPathCrossref}/crossref_ds - + - - - - - - - - + diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index e61694e5c..eb82c3a7d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -27,6 +27,12 @@ hostedByMapPath the hostedByMap Path + + openaireOrganizationPath + the OpenAire Organizations Path + + + outputPath the Path of the sequence file action set @@ -75,6 +81,7 @@ + ${wf:conf('resumeFrom') eq 'Skip'} ${wf:conf('resumeFrom') eq 'PreprocessMag'} ${wf:conf('resumeFrom') eq 'PreprocessUW'} ${wf:conf('resumeFrom') eq 'ProcessORCID'} @@ -213,6 +220,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --hostedByMapPath${hostedByMapPath} + --openaireOrganizationPath${openaireOrganizationPath} --affiliationPath${inputPathMAG}/dataset/Affiliations --paperAffiliationPath${inputPathMAG}/dataset/PaperAuthorAffiliations --workingPath${workingPath} diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index 2b241ed5f..d4079058e 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -1,12 +1,11 @@ package eu.dnetlib.doiboost.orcid; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import java.io.*; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.text.ParseException; import java.text.SimpleDateFormat; @@ -16,7 +15,6 @@ import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.compress.utils.Lists; -import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; @@ -27,8 +25,11 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.schema.orcid.AuthorData; -import eu.dnetlib.doiboost.orcid.xml.XMLRecordParserTest; +import eu.dnetlib.doiboost.orcid.util.DownloadsReport; +import eu.dnetlib.doiboost.orcid.util.MultiAttemptsHttpConnector; import jdk.nashorn.internal.ir.annotations.Ignore; public class OrcidClientTest { @@ -48,7 +49,7 @@ public class OrcidClientTest { @BeforeAll private static void setUp() throws IOException { - testPath = Files.createTempDirectory(XMLRecordParserTest.class.getName()); + testPath = Files.createTempDirectory(OrcidClientTest.class.getName()); System.out.println("using test path: " + testPath); } @@ -57,7 +58,7 @@ public class OrcidClientTest { // 'https://api.orcid.org/v3.0/0000-0001-7291-3210/record' @Test - public void downloadTest() throws Exception { + void downloadTest() throws Exception { final String orcid = "0000-0001-7291-3210"; String record = testDownloadRecord(orcid, REQUEST_TYPE_RECORD); String filename = testPath + "/downloaded_record_".concat(orcid).concat(".xml"); @@ -159,18 +160,19 @@ public class OrcidClientTest { } @Test - private void testReadBase64CompressedRecord() throws Exception { + @Disabled + void testReadBase64CompressedRecord() throws Exception { final String base64CompressedRecord = IOUtils .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); final String recordFromSeqFile = ArgumentApplicationParser.decompressValue(base64CompressedRecord); logToFile(testPath, "\n\ndownloaded \n\n" + recordFromSeqFile); final String downloadedRecord = testDownloadRecord("0000-0003-3028-6161", REQUEST_TYPE_RECORD); - assertTrue(recordFromSeqFile.equals(downloadedRecord)); + assertEquals(recordFromSeqFile, downloadedRecord); } @Test @Disabled - public void lambdaFileReaderTest() throws Exception { + void lambdaFileReaderTest() throws Exception { String last_update = "2021-01-12 00:00:06.685137"; TarArchiveInputStream input = new TarArchiveInputStream( new GzipCompressorInputStream(new FileInputStream("/tmp/last_modified.csv.tar"))); @@ -187,7 +189,7 @@ public class OrcidClientTest { while ((line = br.readLine()) != null) { String[] values = line.split(","); List recordInfo = Arrays.asList(values); - assertTrue(recordInfo.size() == 4); + assertEquals(4, recordInfo.size()); String orcid = recordInfo.get(0); String modifiedDate = recordInfo.get(3); rowNum++; @@ -264,7 +266,7 @@ public class OrcidClientTest { } @Test - public void downloadWorkTest() throws Exception { + void downloadWorkTest() throws Exception { String orcid = "0000-0003-0015-1952"; String record = testDownloadRecord(orcid, REQUEST_TYPE_WORK); String filename = "/tmp/downloaded_work_".concat(orcid).concat(".xml"); @@ -274,7 +276,7 @@ public class OrcidClientTest { } @Test - public void downloadRecordTest() throws Exception { + void downloadRecordTest() throws Exception { String orcid = "0000-0001-5004-5918"; String record = testDownloadRecord(orcid, REQUEST_TYPE_RECORD); String filename = "/tmp/downloaded_record_".concat(orcid).concat(".xml"); @@ -284,7 +286,7 @@ public class OrcidClientTest { } @Test - public void downloadWorksTest() throws Exception { + void downloadWorksTest() throws Exception { String orcid = "0000-0001-5004-5918"; String record = testDownloadRecord(orcid, REQUEST_TYPE_WORKS); String filename = "/tmp/downloaded_works_".concat(orcid).concat(".xml"); @@ -294,7 +296,7 @@ public class OrcidClientTest { } @Test - public void downloadSingleWorkTest() throws Exception { + void downloadSingleWorkTest() throws Exception { String orcid = "0000-0001-5004-5918"; String record = testDownloadRecord(orcid, REQUEST_TYPE_WORK); String filename = "/tmp/downloaded_work_47652866_".concat(orcid).concat(".xml"); @@ -304,7 +306,7 @@ public class OrcidClientTest { } @Test - public void cleanAuthorListTest() throws Exception { + void cleanAuthorListTest() throws Exception { AuthorData a1 = new AuthorData(); a1.setOid("1"); a1.setName("n1"); @@ -333,7 +335,7 @@ public class OrcidClientTest { @Test @Ignore - public void testUpdatedRecord() throws Exception { + void testUpdatedRecord() throws Exception { final String base64CompressedRecord = IOUtils .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); final String record = ArgumentApplicationParser.decompressValue(base64CompressedRecord); @@ -342,9 +344,89 @@ public class OrcidClientTest { @Test @Ignore - private void testUpdatedWork() throws Exception { + void testUpdatedWork() throws Exception { final String base64CompressedWork = "H4sIAAAAAAAAAM1XS2/jNhC+51cQOuxJsiXZSR03Vmq0G6Bo013E6R56oyXaZiOJWpKy4y783zvUg5Ksh5uiCJogisX5Zjj85sHx3f1rFKI94YKyeGE4I9tAJPZZQOPtwvj9+cGaGUhIHAc4ZDFZGEcijHvv6u7A+MtcPVCSSgsUQObYzuzaccBEguVuYYxt+LHgbwKP6a11M3WnY6UzrpB7KuiahlQeF0aSrkPqGwhcisWcxpLwGIcLYydlMh+PD4fDiHGfBvDcjmMxLhGlBglSH8vsIH0qGlLqBFRIGvvDWjWQ1iMJJ2CKBANqGlNqMbkj3IpxRPq1KkypFZFoDRHa0aRfq8JoNjhnfIAJJS6xPouiIQJyeYmGQzE+cO5cXqITcItBlKyASExD0a93jiwtvJDjYXDDAqBPHoH2wMmVWGNf8xyyaEBiSTeUDHHWBpd2Nmmc10yfbgHQrHCyIRxKjQwRUoFKPRwEnIgBnQJQVdGeQgJaCRN0OMnPkaUFVbD9WkpaIndQJowf+8EFoIpTErJjBFQOBavElFpfUxwC9ZcqvQErdQXhe+oPFF8BaObupYzVsYEOARzSoZBWmKqaBMHcV0Wf8oG0beIqD+Gdkz0lhyE3NajUW6fhQFSV9Nw/MCBYyofYa0EN7wrBz13eP+Y+J6obWgE8Pdd2JpYD94P77Ezmjj13b0bu5PqPu3EXumEnxEJaEVxSUIHammsra+53z44zt2/m1/bItaeVtQ6dhs3c4XytvW75IYUchMKvEHVUyqmnWBFAS0VJrqSvQde6vp251ux2NtFuKcVOi+oK9YY0M0Cn6o4J6WkvtEK2XJ1vfPGAZxSoK8lb+SxJBbLQx1CohOLndjJUywQWUFmqEi3G6Zaqf/7buOyYJd5IYpfmf0XipfP18pDR9cQCeEuJQI/Lx36bFbVnpBeL2UwmqQw7ApAvf4GeGGQdEbENgolui/wdpjHaYCmPCIPPAmGBIsxfoLUhyRCB0SeCakEBJRKBtfJ+UBbI15TG4PaGBAhWthx8DmFYtHZQujv1CWbLLdzmmUKmHEOWCe1/zdu78bn/+YH+hCOqOzcXfFwuP6OVT/P710crwqGXFrpNaM2GT3MXarw01i15TIi3pmtJXgtbTVGf3h6HKfF+wBAnPyTfdCChudlm5gZaoG//F9pPZsGQcqqbyZN5hBau5OoIJ3PPwjTKDuG4s5MZp2rMzF5PZoK34IT6PIFOPrk+mTiVO5aJH2C+JJRjE/06eoRfpJxa4VgyYaLlaJUv/EhCfATMU/76gEOfmehL/qbJNNHjaFna+CQYB8wvo9PpPFJ5MOrJ1Ix7USBZqBl7KRNOx1d3jex7SG6zuijqCMWRusBsncjZSrM2u82UJmqzpGhvUJN2t6caIM9QQgO9c0t40UROnWsJd2Rbs+nsxpna9u30ttNkjechmzHjEST+X5CkkuNY0GzQkzyFseAf7lSZuLwdh1xSXKvvQJ4g4abTYgPV7uMt3rskohlJmMa82kQkshtyBEIYqQ+YB8X3oRHg7iFKi/bZP+Ao+T6BJhIT/vNPi8ffZs+flk+r2v0WNroZiyWn6xRmadHqTJXsjLJczElAZX6TnJdoWTM1SI2gfutv3rjeBt5t06rVvNuWup29246tlvluO+u2/G92bK9DXheL6uFd/Q3EaRDZqBIAAA=="; final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork); logToFile(testPath, "\n\nwork updated \n\n" + work); } + + @Test + void downloadUnknownHostExceptionTest() throws Exception { + logToFile(testPath, "downloadUnknownHostExceptionTest"); + final String orcid = "0000-0001-7291-3210"; + final HttpClientParams clientParams = new HttpClientParams(); + clientParams.setMaxNumberOfRetry(2); + MultiAttemptsHttpConnector httpConnector = new MultiAttemptsHttpConnector(clientParams); + httpConnector.setAuthMethod(MultiAttemptsHttpConnector.BEARER); + httpConnector.setAcceptHeaderValue("application/vnd.orcid+xml"); + httpConnector.setAuthToken("78fdb232-7105-4086-8570-e153f4198e3d"); + String wrongApiUrl = "https://api.orcid_UNKNOWN.org/v3.0/" + orcid + "/" + REQUEST_TYPE_RECORD; + String url = "UNKNOWN"; + DownloadsReport report = new DownloadsReport(); + try { + httpConnector.getInputSource(wrongApiUrl, report); + } catch (CollectorException ce) { + logToFile(testPath, "CollectorException downloading: " + ce.getMessage()); + } catch (Throwable t) { + logToFile(testPath, "Throwable downloading: " + t.getMessage()); + } + } + + @Test + void downloadAttemptSuccessTest() throws Exception { + logToFile(testPath, "downloadAttemptSuccessTest"); + final String orcid = "0000-0001-7291-3210"; + final HttpClientParams clientParams = new HttpClientParams(); + clientParams.setMaxNumberOfRetry(2); + MultiAttemptsHttpConnector httpConnector = new MultiAttemptsHttpConnector(clientParams); + httpConnector.setAuthMethod(MultiAttemptsHttpConnector.BEARER); + httpConnector.setAcceptHeaderValue("application/vnd.orcid+xml"); + httpConnector.setAuthToken("78fdb232-7105-4086-8570-e153f4198e3d"); + String apiUrl = "https://api.orcid.org/v3.0/" + orcid + "/" + REQUEST_TYPE_RECORD; + String url = "UNKNOWN"; + DownloadsReport report = new DownloadsReport(); + String record = httpConnector.getInputSource(apiUrl, report); + logToFile(testPath, "Downloaded at first attempt record: " + record); + } + + @Test + void downloadAttemptNotFoundTest() throws Exception { + logToFile(testPath, "downloadAttemptNotFoundTest"); + final HttpClientParams clientParams = new HttpClientParams(); + clientParams.setMaxNumberOfRetry(2); + MultiAttemptsHttpConnector httpConnector = new MultiAttemptsHttpConnector(clientParams); + httpConnector.setAuthMethod(MultiAttemptsHttpConnector.BEARER); + httpConnector.setAcceptHeaderValue("application/vnd.orcid+xml"); + httpConnector.setAuthToken("78fdb232-7105-4086-8570-e153f4198e3d"); + String apiUrl = "https://api.orcid.org/v3.0/NOTFOUND/" + REQUEST_TYPE_RECORD; + DownloadsReport report = new DownloadsReport(); + try { + httpConnector.getInputSource(apiUrl, report); + } catch (CollectorException ce) { + + } + report.forEach((k, v) -> { + try { + logToFile(testPath, k + " " + v); + } catch (IOException e) { + e.printStackTrace(); + } + }); + } + + @Test + @Ignore + void testDownloadedAuthor() throws Exception { + final String base64CompressedWork = "H4sIAAAAAAAAAI2Yy26jMBSG932KiD0hIe1MiwiVZjGLkWbX2XRHsFOsgs3YJmnefszFFy4+mUhtVPz9P/gcH/vQ9PWrrjYXzAVh9Bjst7tgg2nBEKEfx+DP28/wOdgImVOUV4ziY3DDInjNHlKOC8ZRMnxtmlyWxyDaqU+ofg7h/uX7IYwfn+Ngo25ARUKoxJzm1TEopWySKLper1vGC4LU74+IikgTWoFRW+SyfyyfxCBag4iQhBawyoGMDjdqJrnECJAZRquYLDEPaV5jv8oyWlXj+qTiXZLGr7KMiQbnjAOR6IY1W7C6hgIwjGt6SKGfHsY13ajHYipLIcIyJ5Xw6+akdvjEtyt4wxEwM6+VGph5N2zYr2ENhQRhKsmZYChmS1j7nFs6VIBPOwImKhyfMVeFg6GAWEjrcoQ4FoBmBGwVXYhagGHDBIEX+ZzUDiqyn35VN6rJUpUJ4zc/PAI2T03FbrUKJZQszWjV3zavVOjvVfoE01qB+YUUQPGNwHTt3luxJjdqh1AxJFBKLWOrSeCcF13RtxxYtlPOPqH6m+MLwVfoMQ2kdae2ArLajc6fTxkI1nIoegs0yB426pMO+0fSw07xDKMu0XKSde5C2VvrlVMijRzFwqY7XTJI1QMLWcmEzMxtDdxfHiYSgTNJnYJ1K9y5k0tUrMgrnGGaRiuXxxuClulYUbr0nBvpkYLjvgTCGsuSoex3f1CEvRPHKI184NJKtKeaiO7cD5E61bJ4F+9DFd7d01u8Tw6H5BBvvz8f3q3nXLGIeJULGdaqeVBBRK7rS7h/fNvvk/gpedxt4923dxP7Fc3KtKuc1BhlkrfYmeN4dcmrhmbw60+HmWw2CKgbTuqc32CXKTTmeTWT6bDBjPsQ0DTpnchdaYO0ayQ2FyLIiVREqs25aU8VKYLRbK0BsyZuqvr1MU2Sm/rDdhe/2CRN6FU/b+oBVyj1zqRtC5F8kAumfTclsl+s7EoNQu64nfOaVLeezX60Z3XCULLi6GI2IZGTEeey7fec9lBAuXawIHKcpifE7GABHWfoxLVfpUNPBXoMbZWrHFsR3bPAk9J9i2sw9nW6AQT1mpk++7JhW+v44Hmt8PomJqfD13jRnvFOSxCKtu6qHoyBbQ7cMFo750UEfGaXm6bEeplXIXj2hvL6mA7tzvIwmM9pbJFBG834POZdLGi2gH2u9u0K9HMwn5PTioFWLufzmrS4oNuU9Pkt2rf/2jMs7fMdm2rQTTM+j+49AzToAVuXYA1mD2k0+XdE9vAP+JYR5NcQAAA="; + final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork); + logToFile(testPath, "\n\ndownloaded author \n\n" + work); + } + + @Test + @Ignore + void testDownloadedWork() throws Exception { + final String base64CompressedWork = "H4sIAAAAAAAAANVa63LiOBb+z1Oo+LVbhbkGAlTCLE1Id9IhTQV6unr/CVvB2tiWR5Khmal5rX2BfbE9ki3b3Jzt6Y13h6pQSPrOXTo6knL10zffQxvCBWXBdbVVb1YRCWzm0GB9Xf28vLX6VSQkDhzssYBcV3dEVH8aVa62jL8M1RcKI2kBAYwNLnrtXrMPFCGW7nW10YSPBX8dq3XRb1swNGgomkaG3FBBV9SjcnddDaOVR+0qApUCMaSBJDzA3nXVlTIcNhrb7bbOuE0d+F43AtEwCENBnMjGUhtyjiSFGBqHCkkDu5gqB0rpSMgJsCJOAVmKMVRMuoRbAfbJeaoMY6h84q8gQi4Nz1NlmNQbnDNe4Ak1bLA28/0iB8TjBg1GMV5gdzxu0CGoxSBKlkMkpp44T3eINBxeyG5bKDABpJb7QF1guRpOsd/iOWRRhwSSPlNS5LNjsOHzHAXxmjlHmwBSr3DyTDgsNVLkkAxk6LDjcCIKaBJAtoo2FCagFTJBiyf5IdJwUAv2PJUaNUgXlgnju/PgBJDFKfTYzgdXFgXLYAzVLxH2wPWvrfQ9mKEVhG+oXbD4EsD+3H1txqaxgQwBPqRFIc0w2WoSBHNbLfqIF0zbfVymIbQ52VCyLVIzBRm6VeQVRFWNHuoHDASLeJH3jqDVUQXB5yrOH0ObE5UNLQe+R+1mu2U1u1Z7sGy2hq3esN2tt5oXf79qnELv8fGwkJYPmxSswD1uA6vVXrY7w+5g2G3WuxedjNsJmj2escJx33G/ZXsU5iAs/AyRR0WcjpRXBLglc0lM1BjP59bX1qw9Hn/+dH87/dy9vBikeinKkyzVHjoqJNWIk7QuE3KU6pES6O7MwsarJh44QW1KowcWOCxAC9tlzEPsGX3YrYGQICgS0JKzENach2bEoTYNyKEQzaJyQnzSqesKSaV3IhRx92L8tLAm7GerjbZUujSwlFnIobqKkTuth+Q4ED4Vqqypp5JyfK8ah5Ji0f8AZVSGT2TZVGXfBLw/liOyqdRpJqfyXr8ldyEZrehKkm8Jr/2hc3Qb7EVk9DfMJbU98pu3k+6aETXXBebCZpt23tBaBUfSZRxdo98eYmgNfRxrh3zAnldDM/37FvZ+IiWtoQfddgiaEGBIDGCG7btA7jgBP9svAK2h90l4yYqIGop5jgMHXA4J0NB9ksR+YTX0qFtfqACO01jGjDHFPx552AW2W0P3uvGROk4NLfTvCeNS8X9MaDg1rL9Qz6PYh7En3f4ZNmKS6nUfQYFmE6PYe05IYBqPFGaq5wHlYpaoDbYqxokVK+JBerz51z+BIzc+SfSdTHVrTiSYtZzGFNOdGrr5ohsLF2+NUguqppkDoua6/S6yXwAYu44pM+/HiZ1BwEDWMqYbC5fjZ+MEBwMjb4PRLdTFYWrUwiUhJH/H+G3pMl/7fjqJhTGwSwU5lnfLsVDmxIPvmRetbJeCOsvfaxWXbXWxLVziqNky51BLW1OP2JKzgNoASSa7Gk1WAfrLI9mirzBBIUD1r/W/AgrMla7CjEMOzYBJolo30/mnxd0SzadPt5+eZtMb9O7rEN1wNINgEA8Ha+IxNMdrHLCQRR4TFRCudnmB7m6GqD0YDCqW+lQqlfnndw93iw/TJ/RwN5k+TqZDNJkAQyUvUlWvktjrdgbQEeI1EapN8Grd7MOeYJlfajSxWVOMfcIhVQXgfcFsqhcceobVA/U3GjsbDCYrjVSKSz0wHo8Xym6dArRvvjsbAfUGouFr8s5lG9o72DVVSy1saDqMqlarWW+12r2GiIXXMzuAU6AQcLLqWf3mZRf6iOlsNQdda9BudhQnvNNdPWN8XA7BgU5G2k3pLADA75XD3BSnn3y+3M90SbZWGczkxiRVmfSaJrd0V8u0yG3CeYRyht7O07Ste45weuqNmhcpLO44woEPRq1eilLN/f3ntEqGPFfzi2PmudHTO3EOEKf60LdTyUeDr7KIIzKfTfqtdr896JxklQtbES/IQD7UyL+SZIJSXYhLHkHZ9oqEjPR1MRzWu550cDYdCeI9n+S4hzouUU76+UeCQJ0fjkKn0+v3m703i0Eh/z97BCDH/XAAziTIt4rH94j7s4dHbSY/HJ90e3qriBQL+MMxCGETs9j/QxiSQ5PaS63/QsZqdS8vOxdvtj7Oc//fL4dTI2LvDAfVA6erSDKe3+cPxw70j4c5HHZlfLT9iAEZYKjZkxOYKZxymJy659l/t+QZllC5bvVJrzShD5GN0/NkiaZyqNcJh0NrdngtTfp7wviaHB+SS1Ng7O+Sk3h5HodT4S8RyY78pUmGM6eEg1l8tVCa1KnvY/SgrzDKsxRLF46j+uahNKH3BE6lsIb1lUxpUhdS3WUE+u6nPP/qiyAsklumMhMz9SBNqeus0oQ+QXqwIa7m3qy87IhXnBLPI8kVXXlZMaASm5vAEqWuKYkvHMtbPdiPiIdm6dVmeVMZjX+lfnKDWmaRAT7ev6ctTfhEF3RoWnJeXlKfSXcHcsf69rk0wTd4Qx30RV9yl5et2Ipwqe/SS5MJXiU8vbIv2b/qZaC8PZ65AUwj9QJR3vx1mQ9b7VPy1FFebnSpWq7xi0qJuwA+fLYpL7rwJdLXobcSa97kM4Cl35f3YXmofp0+8R9gBc/XeXL9Vn38pH7mLTs27z9T8ky1n7ynlZ0I4le78rYzl6t/woG5krwQlpcRcLDD2UPkH5F73C9G5tFKfY0q/wa1TIHI0CgAAA=="; + final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork); + logToFile(testPath, "\n\ndownloaded work \n\n" + work); + } } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java index 235db52d4..78760fa96 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java @@ -24,10 +24,7 @@ import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; public class XMLRecordParserTest { - private static final String NS_WORK = "work"; - private static final String NS_WORK_URL = "http://www.orcid.org/ns/work"; - private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common"; - private static final String NS_COMMON = "common"; + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static Path testPath; @@ -38,12 +35,10 @@ public class XMLRecordParserTest { } @Test - public void testOrcidAuthorDataXMLParser() throws Exception { + void testOrcidAuthorDataXMLParser() throws Exception { String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_0000-0001-6828-479X.xml")); - XMLRecordParser p = new XMLRecordParser(); - AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes()); assertNotNull(authorData); assertNotNull(authorData.getName()); @@ -54,12 +49,10 @@ public class XMLRecordParserTest { } @Test - public void testOrcidXMLErrorRecordParser() throws Exception { + void testOrcidXMLErrorRecordParser() throws Exception { String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_error.xml")); - XMLRecordParser p = new XMLRecordParser(); - AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes()); assertNotNull(authorData); assertNotNull(authorData.getErrorCode()); @@ -67,14 +60,12 @@ public class XMLRecordParserTest { } @Test - public void testOrcidWorkDataXMLParser() throws Exception { + void testOrcidWorkDataXMLParser() throws Exception { String xml = IOUtils .toString( this.getClass().getResourceAsStream("activity_work_0000-0003-2760-1191.xml")); - XMLRecordParser p = new XMLRecordParser(); - WorkData workData = XMLRecordParser.VTDParseWorkData(xml.getBytes()); assertNotNull(workData); assertNotNull(workData.getOid()); @@ -83,7 +74,7 @@ public class XMLRecordParserTest { } @Test - public void testOrcidOtherNamesXMLParser() throws Exception { + void testOrcidOtherNamesXMLParser() throws Exception { String xml = IOUtils .toString( @@ -91,30 +82,13 @@ public class XMLRecordParserTest { AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes()); assertNotNull(authorData); assertNotNull(authorData.getOtherNames()); - assertTrue(authorData.getOtherNames().get(0).equals("Andrew C. Porteus")); + assertEquals("Andrew C. Porteus", authorData.getOtherNames().get(0)); String jsonData = JsonWriter.create(authorData); assertNotNull(jsonData); } -// @Test -// private void testWorkIdLastModifiedDateXMLParser() throws Exception { -// String xml = IOUtils -// .toString( -// this.getClass().getResourceAsStream("record_0000-0001-5004-5918.xml")); -// Map workIdLastModifiedDate = XMLRecordParser.retrieveWorkIdLastModifiedDate(xml.getBytes()); -// workIdLastModifiedDate.forEach((k, v) -> { -// try { -// OrcidClientTest -// .logToFile( -// k + " " + v + " isModified after " + SparkDownloadOrcidWorks.lastUpdateValue + ": " -// + SparkDownloadOrcidWorks.isModified("0000-0001-5004-5918", v)); -// } catch (IOException e) { -// } -// }); -// } - @Test - public void testAuthorSummaryXMLParser() throws Exception { + void testAuthorSummaryXMLParser() throws Exception { String xml = IOUtils .toString( this.getClass().getResourceAsStream("record_0000-0001-5004-5918.xml")); @@ -124,7 +98,7 @@ public class XMLRecordParserTest { } @Test - public void testWorkDataXMLParser() throws Exception { + void testWorkDataXMLParser() throws Exception { String xml = IOUtils .toString( this.getClass().getResourceAsStream("activity_work_0000-0003-2760-1191.xml")); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java index 01e26dcb4..54c16b5d7 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java @@ -4,6 +4,7 @@ package eu.dnetlib.doiboost.orcidnodoi; import static org.junit.jupiter.api.Assertions.*; import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -11,36 +12,36 @@ import org.slf4j.LoggerFactory; import com.google.gson.JsonElement; import com.google.gson.JsonParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; -import jdk.nashorn.internal.ir.annotations.Ignore; -public class PublicationToOafTest { +class PublicationToOafTest { private static final Logger logger = LoggerFactory.getLogger(PublicationToOafTest.class); @Test - @Ignore - private void convertOafPublicationTest() throws Exception { + @Disabled + void convertOafPublicationTest() throws Exception { String jsonPublication = IOUtils .toString( PublicationToOafTest.class.getResourceAsStream("publication.json")); JsonElement j = new JsonParser().parse(jsonPublication); - logger.info("json publication loaded: " + j.toString()); + logger.info("json publication loaded: {}", j.toString()); PublicationToOaf publicationToOaf = new PublicationToOaf(); Publication oafPublication = (Publication) publicationToOaf .generatePublicationActionsFromDump(j.getAsJsonObject()); assertNotNull(oafPublication.getId()); assertNotNull(oafPublication.getOriginalId()); - assertEquals(oafPublication.getOriginalId().get(0), "60153327"); - logger.info("oafPublication.getId(): " + oafPublication.getId()); + assertEquals("60153327", oafPublication.getOriginalId().get(0)); + logger.info("oafPublication.getId(): {}", oafPublication.getId()); assertEquals( - oafPublication.getTitle().get(0).getValue(), - "Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp"); + "Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp", + oafPublication.getTitle().get(0).getValue()); assertNotNull(oafPublication.getLastupdatetimestamp()); assertNotNull(oafPublication.getDateofcollection()); assertNotNull(oafPublication.getDateoftransformation()); - assertTrue(oafPublication.getAuthor().size() == 7); + assertEquals(7, oafPublication.getAuthor().size()); oafPublication.getAuthor().forEach(a -> { assertNotNull(a.getFullname()); assertNotNull(a.getRank()); @@ -64,15 +65,15 @@ public class PublicationToOafTest { if (oafPublication.getExternalReference() != null) { oafPublication.getExternalReference().forEach(e -> { assertNotNull(e.getRefidentifier()); - assertEquals(e.getQualifier().getSchemeid(), "dnet:pid_types"); + assertEquals(ModelConstants.DNET_PID_TYPES, e.getQualifier().getSchemeid()); }); } assertNotNull(oafPublication.getInstance()); oafPublication.getInstance().forEach(i -> { assertNotNull(i.getInstancetype().getClassid()); - logger.info("i.getInstancetype().getClassid(): " + i.getInstancetype().getClassid()); + logger.info("i.getInstancetype().getClassid(): {}", i.getInstancetype().getClassid()); assertNotNull(i.getInstancetype().getClassname()); - logger.info("i.getInstancetype().getClassname(): " + i.getInstancetype().getClassname()); + logger.info("i.getInstancetype().getClassname(): {}", i.getInstancetype().getClassname()); }); } } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java index 54c2d6217..99ec656d5 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java @@ -1,8 +1,7 @@ package eu.dnetlib.doiboost.orcidnodoi.xml; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import java.io.IOException; import java.util.*; @@ -25,7 +24,7 @@ import eu.dnetlib.dhp.schema.orcid.Contributor; import eu.dnetlib.dhp.schema.orcid.WorkDetail; import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; -public class OrcidNoDoiTest { +class OrcidNoDoiTest { private static final Logger logger = LoggerFactory.getLogger(OrcidNoDoiTest.class); @@ -34,7 +33,7 @@ public class OrcidNoDoiTest { static String orcidIdA = "0000-0003-2760-1191"; @Test - public void readPublicationFieldsTest() + void readPublicationFieldsTest() throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { logger.info("running loadPublicationFieldsTest ...."); String xml = IOUtils @@ -44,10 +43,6 @@ public class OrcidNoDoiTest { if (xml == null) { logger.info("Resource not found"); } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } WorkDetail workData = null; try { workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); @@ -87,7 +82,7 @@ public class OrcidNoDoiTest { } @Test - public void authorDoubleMatchTest() throws Exception { + void authorDoubleMatchTest() throws Exception { logger.info("running authorSimpleMatchTest ...."); String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml"; AuthorData author = new AuthorData(); @@ -98,71 +93,49 @@ public class OrcidNoDoiTest { .toString( OrcidNoDoiTest.class.getResourceAsStream(orcidWork)); - if (xml == null) { - logger.info("Resource not found"); - } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } - WorkDetail workData = null; - try { - workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); - } catch (Exception e) { - logger.error("parsing xml", e); - } + WorkDetail workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); + assertNotNull(workData); Contributor a = workData.getContributors().get(0); - assertTrue(a.getCreditName().equals("Abdel-Dayem K")); + assertEquals("Abdel-Dayem K", a.getCreditName()); AuthorMatcher.match(author, workData.getContributors()); - assertTrue(workData.getContributors().size() == 6); + assertEquals(6, workData.getContributors().size()); } @Test - public void readContributorsTest() + void readContributorsTest() throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { logger.info("running loadPublicationFieldsTest ...."); String xml = IOUtils .toString( OrcidNoDoiTest.class.getResourceAsStream("activity_work_0000-0003-2760-1191_contributors.xml")); - if (xml == null) { - logger.info("Resource not found"); - } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } - WorkDetail workData = null; - try { - workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); - } catch (Exception e) { - logger.error("parsing xml", e); - } + WorkDetail workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); + assertNotNull(workData.getContributors()); - assertTrue(workData.getContributors().size() == 5); + assertEquals(5, workData.getContributors().size()); assertTrue(StringUtils.isBlank(workData.getContributors().get(0).getCreditName())); - assertTrue(workData.getContributors().get(0).getSequence().equals("seq0")); - assertTrue(workData.getContributors().get(0).getRole().equals("role0")); - assertTrue(workData.getContributors().get(1).getCreditName().equals("creditname1")); + assertEquals("seq0", workData.getContributors().get(0).getSequence()); + assertEquals("role0", workData.getContributors().get(0).getRole()); + assertEquals("creditname1", workData.getContributors().get(1).getCreditName()); assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getSequence())); assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getRole())); - assertTrue(workData.getContributors().get(2).getCreditName().equals("creditname2")); - assertTrue(workData.getContributors().get(2).getSequence().equals("seq2")); + assertEquals("creditname2", workData.getContributors().get(2).getCreditName()); + assertEquals("seq2", workData.getContributors().get(2).getSequence()); assertTrue(StringUtils.isBlank(workData.getContributors().get(2).getRole())); - assertTrue(workData.getContributors().get(3).getCreditName().equals("creditname3")); + assertEquals("creditname3", workData.getContributors().get(3).getCreditName()); assertTrue(StringUtils.isBlank(workData.getContributors().get(3).getSequence())); - assertTrue(workData.getContributors().get(3).getRole().equals("role3")); + assertEquals("role3", workData.getContributors().get(3).getRole()); assertTrue(StringUtils.isBlank(workData.getContributors().get(4).getCreditName())); - assertTrue(workData.getContributors().get(4).getSequence().equals("seq4")); - assertTrue(workData.getContributors().get(4).getRole().equals("role4")); + assertEquals("seq4", workData.getContributors().get(4).getSequence()); + assertEquals("role4", workData.getContributors().get(4).getRole()); } @Test - public void authorSimpleMatchTest() throws Exception { + void authorSimpleMatchTest() throws Exception { String orcidWork = "activity_work_0000-0002-5982-8983.xml"; AuthorData author = new AuthorData(); author.setName("Parkhouse"); @@ -175,10 +148,6 @@ public class OrcidNoDoiTest { if (xml == null) { logger.info("Resource not found"); } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } WorkDetail workData = null; try { workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); @@ -188,20 +157,21 @@ public class OrcidNoDoiTest { assertNotNull(workData); Contributor a = workData.getContributors().get(0); - assertTrue(a.getCreditName().equals("Parkhouse, H.")); + assertEquals("Parkhouse, H.", a.getCreditName()); AuthorMatcher.match(author, workData.getContributors()); - assertTrue(workData.getContributors().size() == 2); + assertEquals(2, workData.getContributors().size()); Contributor c = workData.getContributors().get(0); - assertTrue(c.getOid().equals("0000-0002-5982-8983")); - assertTrue(c.getName().equals("Parkhouse")); - assertTrue(c.getSurname().equals("H.")); - assertTrue(c.getCreditName().equals("Parkhouse, H.")); + + assertEquals("0000-0002-5982-8983", c.getOid()); + assertEquals("Parkhouse", c.getName()); + assertEquals("H.", c.getSurname()); + assertEquals("Parkhouse, H.", c.getCreditName()); } @Test - public void match() { + void match() { AuthorData author = new AuthorData(); author.setName("Joe"); @@ -210,7 +180,6 @@ public class OrcidNoDoiTest { Contributor contributor = new Contributor(); contributor.setCreditName("Joe Dodge"); List contributors = Arrays.asList(contributor); - AuthorMatcher am = new AuthorMatcher(); int matchCounter = 0; List matchCounters = Arrays.asList(matchCounter); contributors @@ -225,12 +194,13 @@ public class OrcidNoDoiTest { } }); - assertTrue(matchCounters.get(0) == 1); + assertEquals(1, matchCounters.get(0)); AuthorMatcher.updateAuthorsSimpleMatch(contributors, author); - assertTrue(contributors.get(0).getName().equals("Joe")); - assertTrue(contributors.get(0).getSurname().equals("Dodge")); - assertTrue(contributors.get(0).getCreditName().equals("Joe Dodge")); - assertTrue(contributors.get(0).getOid().equals("0000-1111-2222-3333")); + + assertEquals("Joe", contributors.get(0).getName()); + assertEquals("Dodge", contributors.get(0).getSurname()); + assertEquals("Joe Dodge", contributors.get(0).getCreditName()); + assertEquals("0000-1111-2222-3333", contributors.get(0).getOid()); AuthorData authorX = new AuthorData(); authorX.setName(nameA); @@ -259,7 +229,7 @@ public class OrcidNoDoiTest { } }); - assertTrue(matchCounters2.get(0) == 2); + assertEquals(2, matchCounters2.get(0)); assertTrue(contributorList.get(0).isSimpleMatch()); assertTrue(contributorList.get(1).isSimpleMatch()); @@ -271,7 +241,7 @@ public class OrcidNoDoiTest { c.setScore(AuthorMatcher.bestMatch(authorX.getName(), authorX.getSurname(), c.getCreditName())); return c; }) - .filter(c -> c.getScore() >= AuthorMatcher.threshold) + .filter(c -> c.getScore() >= AuthorMatcher.THRESHOLD) .max(Comparator.comparing(c -> c.getScore())); assertTrue(optCon.isPresent()); @@ -281,15 +251,16 @@ public class OrcidNoDoiTest { assertTrue(contributorList.get(0).isBestMatch()); assertTrue(!contributorList.get(1).isBestMatch()); AuthorMatcher.updateAuthorsSimilarityMatch(contributorList, authorX); - assertTrue(contributorList.get(0).getName().equals(nameA)); - assertTrue(contributorList.get(0).getSurname().equals(surnameA)); - assertTrue(contributorList.get(0).getCreditName().equals("Abdel-Dayem Khai")); - assertTrue(contributorList.get(0).getOid().equals(orcidIdA)); + + assertEquals(nameA, contributorList.get(0).getName()); + assertEquals(surnameA, contributorList.get(0).getSurname()); + assertEquals("Abdel-Dayem Khai", contributorList.get(0).getCreditName()); + assertEquals(orcidIdA, contributorList.get(0).getOid()); assertTrue(StringUtils.isBlank(contributorList.get(1).getOid())); } @Test - public void authorBestMatchTest() throws Exception { + void authorBestMatchTest() throws Exception { String name = "Khairy"; String surname = "Abdel Dayem"; String orcidWork = "activity_work_0000-0003-2760-1191.xml"; @@ -304,10 +275,6 @@ public class OrcidNoDoiTest { if (xml == null) { logger.info("Resource not found"); } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } WorkDetail workData = null; try { workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); @@ -315,16 +282,17 @@ public class OrcidNoDoiTest { logger.error("parsing xml", e); } AuthorMatcher.match(author, workData.getContributors()); - assertTrue(workData.getContributors().size() == 5); + assertEquals(5, workData.getContributors().size()); List c = workData.getContributors(); - assertTrue(c.get(0).getName().equals(name)); - assertTrue(c.get(0).getSurname().equals(surname)); - assertTrue(c.get(0).getCreditName().equals("Khair Abde Daye")); - assertTrue(c.get(0).getOid().equals(orcidIdA)); + + assertEquals(name, c.get(0).getName()); + assertEquals(surname, c.get(0).getSurname()); + assertEquals("Khair Abde Daye", c.get(0).getCreditName()); + assertEquals(orcidIdA, c.get(0).getOid()); } @Test - public void otherNamesMatchTest() + void otherNamesMatchTest() throws VtdException, ParseException, IOException, XPathEvalException, NavException, XPathParseException { AuthorData author = new AuthorData(); @@ -341,8 +309,9 @@ public class OrcidNoDoiTest { contributor.setCreditName("XY"); List contributors = Arrays.asList(contributor); AuthorMatcher.match(author, contributors); - assertTrue(contributors.get(0).getName().equals("Joe")); - assertTrue(contributors.get(0).getSurname().equals("Dodge")); - assertTrue(contributors.get(0).getOid().equals("0000-1111-2222-3333")); + + assertEquals("Joe", contributors.get(0).getName()); + assertEquals("Dodge", contributors.get(0).getSurname()); + assertEquals("0000-1111-2222-3333", contributors.get(0).getOid()); } } diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index 6f638b188..644ac2140 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -47,7 +47,6 @@ io.github.classgraph classgraph - 4.8.71 diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 692605b03..0d7c74475 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -14,12 +14,17 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.Relation; public class PropagationConstant { + + private PropagationConstant() { + } + public static final String INSTITUTIONAL_REPO_TYPE = "pubsrepository::institutional"; public static final String PROPAGATION_DATA_INFO_TYPE = "propagation"; @@ -63,27 +68,28 @@ public class PropagationConstant { getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_COUNTRY_INSTREPO_CLASS_ID, - PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME)); + PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS)); return nc; } public static DataInfo getDataInfo( - String inference_provenance, String inference_class_id, String inference_class_name) { + String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema) { DataInfo di = new DataInfo(); di.setInferred(true); di.setDeletedbyinference(false); di.setTrust("0.85"); di.setInferenceprovenance(inference_provenance); - di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name)); + di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name, qualifierSchema)); return di; } - public static Qualifier getQualifier(String inference_class_id, String inference_class_name) { + public static Qualifier getQualifier(String inference_class_id, String inference_class_name, String qualifierSchema) { Qualifier pa = new Qualifier(); pa.setClassid(inference_class_id); pa.setClassname(inference_class_name); - pa.setSchemeid(ModelConstants.DNET_PID_TYPES); - pa.setSchemename(ModelConstants.DNET_PID_TYPES); + pa.setSchemeid(qualifierSchema); + pa.setSchemename(qualifierSchema); return pa; } @@ -102,7 +108,7 @@ public class PropagationConstant { r.setRelClass(rel_class); r.setRelType(rel_type); r.setSubRelType(subrel_type); - r.setDataInfo(getDataInfo(inference_provenance, inference_class_id, inference_class_name)); + r.setDataInfo(getDataInfo(inference_provenance, inference_class_id, inference_class_name, ModelConstants.DNET_PROVENANCE_ACTIONS)); return r; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java index 0f45d3beb..8e76b5778 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java @@ -5,16 +5,11 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import com.google.gson.Gson; /** Created by miriam on 01/08/2018. */ public class Community implements Serializable { - private static final Log log = LogFactory.getLog(Community.class); - private String id; private List subjects = new ArrayList<>(); private List providers = new ArrayList<>(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java index 844fe2962..5d92f5ab6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java @@ -2,15 +2,9 @@ package eu.dnetlib.dhp.bulktag.community; import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.stream.Collectors; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.gson.Gson; @@ -22,8 +16,6 @@ import eu.dnetlib.dhp.bulktag.criteria.Selection; /** Created by miriam on 02/08/2018. */ public class CommunityConfiguration implements Serializable { - private static final Log log = LogFactory.getLog(CommunityConfiguration.class); - private Map communities; // map subject -> communityid @@ -136,7 +128,7 @@ public class CommunityConfiguration implements Serializable { else return null; }) - .filter(st -> (st != null)) + .filter(Objects::nonNull) .collect(Collectors.toList()); } @@ -161,7 +153,7 @@ public class CommunityConfiguration implements Serializable { private List getContextIds(List> list) { if (list != null) { - return list.stream().map(p -> p.getFst()).collect(Collectors.toList()); + return list.stream().map(Pair::getFst).collect(Collectors.toList()); } return Lists.newArrayList(); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java index 749ed292f..822d35078 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java @@ -13,6 +13,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -31,11 +32,16 @@ public class CommunityConfigurationFactory { private static final VerbResolver resolver = VerbResolverFactory.newInstance(); - public static CommunityConfiguration newInstance(final String xml) throws DocumentException { + private CommunityConfigurationFactory() { + } + + public static CommunityConfiguration newInstance(final String xml) throws DocumentException, SAXException { log.debug(String.format("parsing community configuration from:\n%s", xml)); - final Document doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(xml)); final Map communities = Maps.newHashMap(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java index e0856ae8f..9002e718f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java @@ -13,9 +13,6 @@ public class Constraint implements Serializable { private String value; private Selection selection; - public Constraint() { - } - public String getVerb() { return verb; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java index b56dfaaa3..0f6fab238 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java @@ -19,11 +19,8 @@ import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; /** Created by miriam on 02/08/2018. */ public class Constraints implements Serializable { private static final Log log = LogFactory.getLog(Constraints.class); - // private ConstraintEncapsulator ce; - private List constraint; - public Constraints() { - } + private List constraint; public List getConstraint() { return constraint; @@ -44,13 +41,8 @@ public class Constraints implements Serializable { try { st.setSelection(resolver); - } catch (NoSuchMethodException e) { - log.error(e.getMessage()); - } catch (IllegalAccessException e) { - log.error(e.getMessage()); - } catch (InvocationTargetException e) { - log.error(e.getMessage()); - } catch (InstantiationException e) { + } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException + | InstantiationException e) { log.error(e.getMessage()); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java index a9427b594..cb198dc43 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java @@ -19,10 +19,6 @@ public class Provider implements Serializable { private SelectionConstraints selectionConstraints; - public SelectionConstraints getSelCriteria() { - return selectionConstraints; - } - public SelectionConstraints getSelectionConstraints() { return selectionConstraints; } @@ -31,10 +27,6 @@ public class Provider implements Serializable { this.selectionConstraints = selectionConstraints; } - public void setSelCriteria(SelectionConstraints selCriteria) { - this.selectionConstraints = selCriteria; - } - public String getOpenaireId() { return openaireId; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java index 993a7ef77..89cf5beff 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.bulktag.community; import java.util.List; import org.dom4j.DocumentException; +import org.xml.sax.SAXException; import com.google.common.base.Joiner; @@ -63,7 +64,7 @@ public class QueryInformationSystem { + " "; public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl) - throws ISLookUpException, DocumentException { + throws ISLookUpException, DocumentException, SAXException { ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); final List res = isLookUp.quickSearchProfile(XQUERY); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index c9ff26963..c8b1bc8fe 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -43,7 +43,6 @@ public class ResultTagger implements Serializable { param.put(key, jsonContext.read(params.get(key))); } catch (com.jayway.jsonpath.PathNotFoundException e) { param.put(key, new ArrayList<>()); - // throw e; } } return param; @@ -52,9 +51,6 @@ public class ResultTagger implements Serializable { public R enrichContextCriteria( final R result, final CommunityConfiguration conf, final Map criteria) { - // } - // public Result enrichContextCriteria(final Result result, final CommunityConfiguration - // conf, final Map criteria) { final Map> param = getParamMap(result, criteria); // Verify if the entity is deletedbyinference. In case verify if to clean the context list @@ -74,7 +70,7 @@ public class ResultTagger implements Serializable { result .getSubject() .stream() - .map(subject -> subject.getValue()) + .map(StructuredProperty::getValue) .filter(StringUtils::isNotBlank) .map(String::toLowerCase) .map(String::trim) @@ -90,15 +86,11 @@ public class ResultTagger implements Serializable { if (Objects.nonNull(result.getInstance())) { for (Instance i : result.getInstance()) { - if (Objects.nonNull(i.getCollectedfrom())) { - if (Objects.nonNull(i.getCollectedfrom().getKey())) { - tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|")); - } + if (Objects.nonNull(i.getCollectedfrom()) && Objects.nonNull(i.getCollectedfrom().getKey())) { + tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|")); } - if (Objects.nonNull(i.getHostedby())) { - if (Objects.nonNull(i.getHostedby().getKey())) { - tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|")); - } + if (Objects.nonNull(i.getHostedby()) && Objects.nonNull(i.getHostedby().getKey())) { + tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|")); } } @@ -149,52 +141,46 @@ public class ResultTagger implements Serializable { return result; } - result - .getContext() - .stream() - .map( - c -> { - if (communities.contains(c.getId())) { - Optional> opt_dataInfoList = Optional.ofNullable(c.getDataInfo()); - List dataInfoList; - if (opt_dataInfoList.isPresent()) - dataInfoList = opt_dataInfoList.get(); - else { - dataInfoList = new ArrayList<>(); - c.setDataInfo(dataInfoList); - } - if (subjects.contains(c.getId())) - dataInfoList - .add( - getDataInfo( - BULKTAG_DATA_INFO_TYPE, - CLASS_ID_SUBJECT, - CLASS_NAME_BULKTAG_SUBJECT, - TAGGING_TRUST)); - if (datasources.contains(c.getId())) - dataInfoList - .add( - getDataInfo( - BULKTAG_DATA_INFO_TYPE, - CLASS_ID_DATASOURCE, - CLASS_NAME_BULKTAG_DATASOURCE, - TAGGING_TRUST)); - if (czenodo.contains(c.getId())) - dataInfoList - .add( - getDataInfo( - BULKTAG_DATA_INFO_TYPE, - CLASS_ID_CZENODO, - CLASS_NAME_BULKTAG_ZENODO, - TAGGING_TRUST)); - } - return c; - }) - .collect(Collectors.toList()); + result.getContext().forEach(c -> { + if (communities.contains(c.getId())) { + Optional> opt_dataInfoList = Optional.ofNullable(c.getDataInfo()); + List dataInfoList; + if (opt_dataInfoList.isPresent()) + dataInfoList = opt_dataInfoList.get(); + else { + dataInfoList = new ArrayList<>(); + c.setDataInfo(dataInfoList); + } + if (subjects.contains(c.getId())) + dataInfoList + .add( + getDataInfo( + BULKTAG_DATA_INFO_TYPE, + CLASS_ID_SUBJECT, + CLASS_NAME_BULKTAG_SUBJECT, + TAGGING_TRUST)); + if (datasources.contains(c.getId())) + dataInfoList + .add( + getDataInfo( + BULKTAG_DATA_INFO_TYPE, + CLASS_ID_DATASOURCE, + CLASS_NAME_BULKTAG_DATASOURCE, + TAGGING_TRUST)); + if (czenodo.contains(c.getId())) + dataInfoList + .add( + getDataInfo( + BULKTAG_DATA_INFO_TYPE, + CLASS_ID_CZENODO, + CLASS_NAME_BULKTAG_ZENODO, + TAGGING_TRUST)); + } + }); communities .removeAll( - result.getContext().stream().map(c -> c.getId()).collect(Collectors.toSet())); + result.getContext().stream().map(Context::getId).collect(Collectors.toSet())); if (communities.isEmpty()) return result; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java index 71ff61d1b..c7dcce812 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java @@ -15,9 +15,6 @@ import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; public class SelectionConstraints implements Serializable { private List criteria; - public SelectionConstraints() { - } - public List getCriteria() { return criteria; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java index 80d98bb1a..8274e26c9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java @@ -3,6 +3,9 @@ package eu.dnetlib.dhp.bulktag.community; public class TaggingConstants { + private TaggingConstants() { + } + public static final String BULKTAG_DATA_INFO_TYPE = "bulktagging"; public static final String CLASS_ID_SUBJECT = "community:subject"; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java index bc6b75fba..54c2dc9aa 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java @@ -31,7 +31,6 @@ public class ZenodoCommunity implements Serializable { } private void setSelCriteria(String json) { - // Type collectionType = new TypeToken>(){}.getType(); selCriteria = new Gson().fromJson(json, SelectionConstraints.class); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java index ec9fb716d..9129e6e54 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java @@ -1,7 +1,9 @@ package eu.dnetlib.dhp.bulktag.criteria; -public interface Selection { +import java.io.Serializable; + +public interface Selection extends Serializable { boolean apply(String value); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java index 54176efb6..459ac1ba9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java @@ -7,16 +7,16 @@ import java.util.Map; import java.util.stream.Collectors; import io.github.classgraph.ClassGraph; -import io.github.classgraph.ClassInfo; import io.github.classgraph.ClassInfoList; import io.github.classgraph.ScanResult; public class VerbResolver implements Serializable { - private Map> map = null; // = new HashMap<>(); - private final ClassGraph classgraph = new ClassGraph(); + + private Map> map = null; public VerbResolver() { + final ClassGraph classgraph = new ClassGraph(); try (ScanResult scanResult = // Assign scanResult in try-with-resources classgraph // Create a new ClassGraph instance .verbose() // If you want to enable logging to stderr @@ -41,8 +41,6 @@ public class VerbResolver implements Serializable { .get(0) .getValue(), value -> (Class) value.loadClass())); - } catch (Exception e) { - e.printStackTrace(); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java index 0bb801999..446ad5fbc 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java @@ -3,6 +3,9 @@ package eu.dnetlib.dhp.bulktag.criteria; public class VerbResolverFactory { + private VerbResolverFactory() { + } + public static VerbResolver newInstance() { return new VerbResolver(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index 04a659a1c..ddc7f93f7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -6,11 +6,10 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Arrays; import java.util.List; +import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -18,11 +17,11 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Relation; /** * For the association of the country to the datasource The association is computed only for datasource of specific type @@ -77,16 +76,16 @@ public class PrepareDatasourceCountryAssociation { List allowedtypes, String inputPath, String outputPath) { - String whitelisted = " d.id = '" + whitelist.get(0) + "'"; - for (int i = 1; i < whitelist.size(); i++) { - whitelisted += " OR d.id = '" + whitelist.get(i) + "'"; - } - String allowed = "d.datasourcetype.classid = '" + allowedtypes.get(0) + "'"; + final String whitelisted = whitelist + .stream() + .map(id -> " d.id = '" + id + "'") + .collect(Collectors.joining(" OR ")); - for (int i = 1; i < allowedtypes.size(); i++) { - allowed += " OR d.datasourcetype.classid = '" + allowedtypes.get(i) + "'"; - } + final String allowed = allowedtypes + .stream() + .map(type -> " d.datasourcetype.classid = '" + type + "'") + .collect(Collectors.joining(" OR ")); Dataset datasource = readPath(spark, inputPath + "/datasource", Datasource.class); Dataset relation = readPath(spark, inputPath + "/relation", Relation.class); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java index 8d0d6c48b..77f7288f6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java @@ -85,13 +85,12 @@ public class PrepareResultCountrySet { Dataset result = readPath(spark, inputPath, resultClazz); result.createOrReplaceTempView("result"); - // log.info("number of results: {}", result.count()); + createCfHbforResult(spark); Dataset datasource_country = readPath(spark, datasourcecountrypath, DatasourceCountry.class); datasource_country.createOrReplaceTempView("datasource_country"); - // log.info("datasource_country number : {}", datasource_country.count()); spark .sql(RESULT_COUNTRYSET_QUERY) @@ -102,7 +101,7 @@ public class PrepareResultCountrySet { ArrayList countryList = a.getCountrySet(); Set countryCodes = countryList .stream() - .map(country -> country.getClassid()) + .map(CountrySbs::getClassid) .collect(Collectors.toSet()); b .getCountrySet() @@ -119,10 +118,6 @@ public class PrepareResultCountrySet { }) .map(couple -> OBJECT_MAPPER.writeValueAsString(couple._2())) .saveAsTextFile(outputPath, GzipCodec.class); -// .write() -// .option("compression", "gzip") -// .mode(SaveMode.Append) -// .json(outputPath); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index 97e0a33e1..4aa48583f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -4,7 +4,9 @@ package eu.dnetlib.dhp.countrypropagation; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.util.*; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -17,10 +19,9 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; @@ -28,8 +29,6 @@ public class SparkCountryPropagationJob { private static final Logger log = LoggerFactory.getLogger(SparkCountryPropagationJob.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -90,7 +89,6 @@ public class SparkCountryPropagationJob { boolean saveGraph) { if (saveGraph) { - // updateResultTable(spark, potentialUpdates, inputPath, resultClazz, outputPath); log.info("Reading Graph table from: {}", sourcePath); Dataset res = readPath(spark, sourcePath, resultClazz); @@ -122,7 +120,7 @@ public class SparkCountryPropagationJob { private static List merge(List c1, List c2) { HashSet countries = c1 .stream() - .map(c -> c.getClassid()) + .map(Qualifier::getClassid) .collect(Collectors.toCollection(HashSet::new)); return c2 diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index b3ef3a112..95b870292 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -8,9 +8,7 @@ import java.util.Arrays; import java.util.List; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -18,7 +16,6 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java index 2cea32e58..c60012a74 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java @@ -87,7 +87,7 @@ public class PrepareResultOrcidAssociationStep2 { }); return a; }) - .map(c -> c._2()) + .map(Tuple2::_2) .map(r -> OBJECT_MAPPER.writeValueAsString(r)) .saveAsTextFile(outputPath, GzipCodec.class); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 436a53cbe..68949b900 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -18,7 +18,6 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -141,34 +140,31 @@ public class SparkOrcidToResultFromSemRelJob { author_surname = author.getSurname(); } if (StringUtils.isNotEmpty(author_surname)) { + // have the same surname. Check the name if (autoritative_author .getSurname() .trim() - .equalsIgnoreCase(author_surname.trim())) { - - // have the same surname. Check the name - if (StringUtils.isNotEmpty(autoritative_author.getName())) { - if (StringUtils.isNotEmpty(author.getName())) { - author_name = author.getName(); + .equalsIgnoreCase(author_surname.trim()) && StringUtils.isNotEmpty(autoritative_author.getName())) { + if (StringUtils.isNotEmpty(author.getName())) { + author_name = author.getName(); + } + if (StringUtils.isNotEmpty(author_name)) { + if (autoritative_author + .getName() + .trim() + .equalsIgnoreCase(author_name.trim())) { + toaddpid = true; } - if (StringUtils.isNotEmpty(author_name)) { + // they could be differently written (i.e. only the initials of the name + // in one of the two + else { if (autoritative_author .getName() .trim() - .equalsIgnoreCase(author_name.trim())) { + .substring(0, 0) + .equalsIgnoreCase(author_name.trim().substring(0, 0))) { toaddpid = true; } - // they could be differently written (i.e. only the initials of the name - // in one of the two - else { - if (autoritative_author - .getName() - .trim() - .substring(0, 0) - .equalsIgnoreCase(author_name.trim().substring(0, 0))) { - toaddpid = true; - } - } } } } @@ -177,13 +173,14 @@ public class SparkOrcidToResultFromSemRelJob { if (toaddpid) { StructuredProperty p = new StructuredProperty(); p.setValue(autoritative_author.getOrcid()); - p.setQualifier(getQualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME)); + p.setQualifier(getQualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, ModelConstants.DNET_PID_TYPES)); p .setDataInfo( getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, - PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME)); + PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS)); Optional> authorPid = Optional.ofNullable(author.getPid()); if (authorPid.isPresent()) { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java index 27ff727fd..ac61e26f9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java @@ -2,30 +2,25 @@ package eu.dnetlib.dhp.projecttoresult; import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.PropagationConstant.getConstraintList; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Arrays; import java.util.List; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; public class PrepareProjectResultsAssociation { - private static final Logger log = LoggerFactory.getLogger(PrepareDatasourceCountryAssociation.class); + private static final Logger log = LoggerFactory.getLogger(PrepareProjectResultsAssociation.class); public static void main(String[] args) throws Exception { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index c57abb451..1ec521af1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -5,28 +5,27 @@ import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.sql.*; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; public class SparkResultToProjectThroughSemRelJob { - private static final Logger log = LoggerFactory.getLogger(PrepareDatasourceCountryAssociation.class); + private static final Logger log = LoggerFactory.getLogger(SparkResultToProjectThroughSemRelJob.class); public static void main(String[] args) throws Exception { @@ -95,26 +94,21 @@ public class SparkResultToProjectThroughSemRelJob { private static FlatMapFunction, Relation> mapRelationRn() { return value -> { - List new_relations = new ArrayList<>(); - ResultProjectSet potential_update = value._1(); - Optional already_linked = Optional.ofNullable(value._2()); - if (already_linked.isPresent()) { - already_linked - .get() - .getProjectSet() - .stream() - .forEach( - (p -> { - potential_update.getProjectSet().remove(p); - })); - } - String resId = potential_update.getResultId(); - potential_update + List newRelations = new ArrayList<>(); + ResultProjectSet potentialUpdate = value._1(); + Optional alreadyLinked = Optional.ofNullable(value._2()); + alreadyLinked + .ifPresent( + resultProjectSet -> resultProjectSet + .getProjectSet() + .forEach( + (p -> potentialUpdate.getProjectSet().remove(p)))); + String resId = potentialUpdate.getResultId(); + potentialUpdate .getProjectSet() - .stream() .forEach( projectId -> { - new_relations + newRelations .add( getRelation( resId, @@ -125,7 +119,7 @@ public class SparkResultToProjectThroughSemRelJob { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME)); - new_relations + newRelations .add( getRelation( projectId, @@ -137,7 +131,7 @@ public class SparkResultToProjectThroughSemRelJob { PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME)); }); - return new_relations.iterator(); + return newRelations.iterator(); }; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index a5f84cd2f..1a008797d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -10,11 +10,12 @@ import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.*; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -109,10 +110,6 @@ public class PrepareResultCommunitySet { }) .map(value -> OBJECT_MAPPER.writeValueAsString(value._2())) .saveAsTextFile(outputPath, GzipCodec.class); -// .write() -// .mode(SaveMode.Overwrite) -// .option("compression", "gzip") -// .json(outputPath); } private static MapFunction mapResultCommunityFn( @@ -131,7 +128,7 @@ public class PrepareResultCommunitySet { communitySet.addAll(organizationMap.get(oId)); } } - if (communitySet.size() > 0) { + if (!communitySet.isEmpty()) { ResultCommunityList rcl = new ResultCommunityList(); rcl.setResultId(rId); ArrayList communityList = new ArrayList<>(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index 7201a30f6..1289ff644 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -4,9 +4,13 @@ package eu.dnetlib.dhp.resulttocommunityfromorganization; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -17,10 +21,9 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Context; +import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; public class SparkResultToCommunityFromOrganizationJob { @@ -59,6 +62,7 @@ public class SparkResultToCommunityFromOrganizationJob { .orElse(Boolean.TRUE); log.info("saveGraph: {}", saveGraph); + @SuppressWarnings("unchecked") Class resultClazz = (Class) Class.forName(resultClassName); SparkConf conf = new SparkConf(); @@ -106,9 +110,12 @@ public class SparkResultToCommunityFromOrganizationJob { List contextList = ret .getContext() .stream() - .map(con -> con.getId()) + .map(Context::getId) .collect(Collectors.toList()); + + @SuppressWarnings("unchecked") R res = (R) ret.getClass().newInstance(); + res.setId(ret.getId()); List propagatedContexts = new ArrayList<>(); for (String cId : communitySet) { @@ -122,7 +129,8 @@ public class SparkResultToCommunityFromOrganizationJob { getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID, - PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME))); + PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS))); propagatedContexts.add(newContext); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java index 09340369d..0ddb19a1a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java @@ -11,7 +11,6 @@ import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -91,7 +90,7 @@ public class PrepareResultCommunitySetStep2 { }); return a; }) - .map(c -> c._2()) + .map(Tuple2::_2) .map(r -> OBJECT_MAPPER.writeValueAsString(r)) .saveAsTextFile(outputPath, GzipCodec.class); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index 4cb241ef2..7f76ead94 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.*; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -62,6 +63,7 @@ public class SparkResultToCommunityThroughSemRelJob { .orElse(Boolean.TRUE); log.info("saveGraph: {}", saveGraph); + @SuppressWarnings("unchecked") Class resultClazz = (Class) Class.forName(resultClassName); runWithSparkHiveSession( @@ -105,15 +107,15 @@ public class SparkResultToCommunityThroughSemRelJob { R ret = value._1(); Optional rcl = Optional.ofNullable(value._2()); if (rcl.isPresent()) { - Set context_set = new HashSet<>(); - ret.getContext().stream().forEach(c -> context_set.add(c.getId())); + Set contexts = new HashSet<>(); + ret.getContext().forEach(c -> contexts.add(c.getId())); List contextList = rcl .get() .getCommunityList() .stream() .map( c -> { - if (!context_set.contains(c)) { + if (!contexts.contains(c)) { Context newContext = new Context(); newContext.setId(c); newContext @@ -123,14 +125,18 @@ public class SparkResultToCommunityThroughSemRelJob { getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, - PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME))); + PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS))); return newContext; } return null; }) .filter(Objects::nonNull) .collect(Collectors.toList()); + + @SuppressWarnings("unchecked") R r = (R) ret.getClass().newInstance(); + r.setId(ret.getId()); r.setContext(contextList); ret.mergeFrom(r); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java index 3cf36e572..0ef5ca181 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java @@ -8,6 +8,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Optional; +import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; @@ -91,13 +92,11 @@ public class PrepareResultInstRepoAssociation { private static void prepareDatasourceOrganization( SparkSession spark, String datasourceOrganizationPath, List blacklist) { - String blacklisted = ""; - if (blacklist.size() > 0) { - blacklisted = " AND id != '" + blacklist.get(0) + "'"; - for (int i = 1; i < blacklist.size(); i++) { - blacklisted += " AND id != '" + blacklist.get(i) + "'"; - } - } + + final String blacklisted = blacklist + .stream() + .map(s -> " AND id != '" + s + "'") + .collect(Collectors.joining()); String query = "SELECT source datasourceId, target organizationId " + "FROM ( SELECT id " diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index 01d7b85e4..63824f1a8 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -4,23 +4,24 @@ package eu.dnetlib.dhp.resulttoorganizationfrominstrepo; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; -import java.util.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.broadcast.Broadcast; -import org.apache.spark.sql.*; import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; public class SparkResultToOrganizationFromIstRepoJob { @@ -84,7 +85,6 @@ public class SparkResultToOrganizationFromIstRepoJob { conf, isSparkSessionManaged, spark -> { - // removeOutputDir(spark, outputPath); if (saveGraph) { execPropagation( spark, @@ -105,9 +105,9 @@ public class SparkResultToOrganizationFromIstRepoJob { String outputPath, Class clazz) { - Dataset ds_org = readPath(spark, datasourceorganization, DatasourceOrganization.class); + Dataset dsOrg = readPath(spark, datasourceorganization, DatasourceOrganization.class); - Dataset potentialUpdates = getPotentialRelations(spark, inputPath, clazz, ds_org); + Dataset potentialUpdates = getPotentialRelations(spark, inputPath, clazz, dsOrg); Dataset alreadyLinked = readPath(spark, alreadyLinkedPath, ResultOrganizationSet.class); @@ -125,26 +125,20 @@ public class SparkResultToOrganizationFromIstRepoJob { private static FlatMapFunction, Relation> createRelationFn() { return value -> { - List new_relations = new ArrayList<>(); - ResultOrganizationSet potential_update = value._1(); - Optional already_linked = Optional.ofNullable(value._2()); - List organization_list = potential_update.getOrganizationSet(); - if (already_linked.isPresent()) { - already_linked - .get() - .getOrganizationSet() - .stream() - .forEach( - rId -> { - organization_list.remove(rId); - }); - } - String resultId = potential_update.getResultId(); - organization_list - .stream() + List newRelations = new ArrayList<>(); + ResultOrganizationSet potentialUpdate = value._1(); + Optional alreadyLinked = Optional.ofNullable(value._2()); + List organizations = potentialUpdate.getOrganizationSet(); + alreadyLinked + .ifPresent( + resOrg -> resOrg + .getOrganizationSet() + .forEach(organizations::remove)); + String resultId = potentialUpdate.getResultId(); + organizations .forEach( orgId -> { - new_relations + newRelations .add( getRelation( orgId, @@ -155,7 +149,7 @@ public class SparkResultToOrganizationFromIstRepoJob { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)); - new_relations + newRelations .add( getRelation( resultId, @@ -167,7 +161,7 @@ public class SparkResultToOrganizationFromIstRepoJob { PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)); }); - return new_relations.iterator(); + return newRelations.iterator(); }; } @@ -175,13 +169,13 @@ public class SparkResultToOrganizationFromIstRepoJob { SparkSession spark, String inputPath, Class resultClazz, - Dataset ds_org) { + Dataset dsOrg) { Dataset result = readPath(spark, inputPath, resultClazz); result.createOrReplaceTempView("result"); createCfHbforResult(spark); - ds_org.createOrReplaceTempView("rels"); + dsOrg.createOrReplaceTempView("rels"); return spark .sql(RESULT_ORGANIZATIONSET_QUERY) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index 72e0a63fa..07299f01a 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -90,7 +90,7 @@ public class BulkTagJobTest { } @Test - public void noUpdatesTest() throws Exception { + void noUpdatesTest() throws Exception { final String pathMap = BulkTagJobTest.pathMap; SparkBulkTagJob .main( @@ -128,7 +128,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectNoPreviousContextTest() throws Exception { + void bulktagBySubjectNoPreviousContextTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext") .getPath(); @@ -224,7 +224,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { + void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { final String sourcePath = getClass() .getResource( "/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance") @@ -306,7 +306,7 @@ public class BulkTagJobTest { } @Test - public void bulktagByDatasourceTest() throws Exception { + void bulktagByDatasourceTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource") .getPath(); @@ -378,7 +378,7 @@ public class BulkTagJobTest { } @Test - public void bulktagByZenodoCommunityTest() throws Exception { + void bulktagByZenodoCommunityTest() throws Exception { final String sourcePath = getClass() .getResource( "/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity") @@ -500,7 +500,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectDatasourceTest() throws Exception { + void bulktagBySubjectDatasourceTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource") .getPath(); @@ -628,7 +628,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectDatasourceZenodoCommunityTest() throws Exception { + void bulktagBySubjectDatasourceZenodoCommunityTest() throws Exception { SparkBulkTagJob .main( @@ -724,7 +724,7 @@ public class BulkTagJobTest { } @Test - public void bulktagDatasourcewithConstraintsTest() throws Exception { + void bulktagDatasourcewithConstraintsTest() throws Exception { final String sourcePath = getClass() .getResource( diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java index ca737b79f..861546adb 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java @@ -10,6 +10,7 @@ import org.apache.commons.lang3.StringUtils; import org.dom4j.DocumentException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; import com.google.gson.Gson; @@ -20,12 +21,12 @@ import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; /** Created by miriam on 03/08/2018. */ -public class CommunityConfigurationFactoryTest { +class CommunityConfigurationFactoryTest { private final VerbResolver resolver = new VerbResolver(); @Test - public void parseTest() throws DocumentException, IOException { + void parseTest() throws DocumentException, IOException, SAXException { String xml = IOUtils .toString( getClass() @@ -39,7 +40,7 @@ public class CommunityConfigurationFactoryTest { } @Test - public void applyVerb() + void applyVerb() throws InvocationTargetException, IllegalAccessException, NoSuchMethodException, InstantiationException { Constraint sc = new Constraint(); @@ -52,7 +53,7 @@ public class CommunityConfigurationFactoryTest { } @Test - public void loadSelCriteriaTest() throws DocumentException, IOException { + void loadSelCriteriaTest() throws DocumentException, IOException, SAXException { String xml = IOUtils .toString( getClass() diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java index 88ad43b6b..963ee5529 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java @@ -5,7 +5,6 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import org.apache.commons.io.FileUtils; @@ -25,6 +24,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Software; import scala.Tuple2; @@ -67,7 +67,7 @@ public class CountryPropagationJobTest { } @Test - public void testCountryPropagationSoftware() throws Exception { + void testCountryPropagationSoftware() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/countrypropagation/sample/software") .getPath(); @@ -105,7 +105,7 @@ public class CountryPropagationJobTest { Dataset countryExploded = verificationDs .flatMap( (FlatMapFunction) row -> row.getCountry().iterator(), Encoders.bean(Country.class)) - .map((MapFunction) c -> c.getClassid(), Encoders.STRING()); + .map((MapFunction) Qualifier::getClassid, Encoders.STRING()); Assertions.assertEquals(9, countryExploded.count()); @@ -119,10 +119,9 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryclassid = verificationDs .flatMap((FlatMapFunction>) row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() + List> prova = new ArrayList<>(); + List countryList = row.getCountry(); + countryList .forEach( c -> prova .add( @@ -180,10 +179,9 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryclassname = verificationDs .flatMap( (FlatMapFunction>) row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() + List> prova = new ArrayList<>(); + List countryList = row.getCountry(); + countryList .forEach( c -> prova .add( @@ -241,10 +239,9 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryProvenance = verificationDs .flatMap( (FlatMapFunction>) row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() + List> prova = new ArrayList<>(); + List countryList = row.getCountry(); + countryList .forEach( c -> prova .add( diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java index 238375197..85db7ecf9 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java @@ -65,7 +65,7 @@ public class OrcidPropagationJobTest { } @Test - public void noUpdateTest() throws Exception { + void noUpdateTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate") .getPath(); @@ -111,7 +111,7 @@ public class OrcidPropagationJobTest { } @Test - public void oneUpdateTest() throws Exception { + void oneUpdateTest() throws Exception { SparkOrcidToResultFromSemRelJob .main( new String[] { @@ -178,7 +178,7 @@ public class OrcidPropagationJobTest { } @Test - public void twoUpdatesTest() throws Exception { + void twoUpdatesTest() throws Exception { SparkOrcidToResultFromSemRelJob .main( new String[] { diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java index abed028e1..2fe1bc574 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java @@ -69,7 +69,7 @@ public class ProjectPropagationJobTest { * @throws Exception */ @Test - public void NoUpdateTest() throws Exception { + void NoUpdateTest() throws Exception { final String potentialUpdateDate = getClass() .getResource( @@ -106,7 +106,7 @@ public class ProjectPropagationJobTest { * @throws Exception */ @Test - public void UpdateTenTest() throws Exception { + void UpdateTenTest() throws Exception { final String potentialUpdatePath = getClass() .getResource( "/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates") @@ -178,7 +178,7 @@ public class ProjectPropagationJobTest { * @throws Exception */ @Test - public void UpdateMixTest() throws Exception { + void UpdateMixTest() throws Exception { final String potentialUpdatepath = getClass() .getResource( "/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates") diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java index d739516fc..4dd8b976c 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java @@ -65,7 +65,7 @@ public class ResultToCommunityJobTest { } @Test - public void testSparkResultToCommunityFromOrganizationJob() throws Exception { + void testSparkResultToCommunityFromOrganizationJob() throws Exception { final String preparedInfoPath = getClass() .getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo") .getPath(); diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java index 7709e00a8..0d5b12c80 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java @@ -65,7 +65,7 @@ public class ResultToCommunityJobTest { } @Test - public void testSparkResultToCommunityThroughSemRelJob() throws Exception { + void testSparkResultToCommunityThroughSemRelJob() throws Exception { SparkResultToCommunityThroughSemRelJob .main( new String[] { diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java index cfcccc5f0..fdcb10fb9 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java @@ -67,7 +67,7 @@ public class ResultToOrganizationJobTest { * @throws Exception */ @Test - public void NoUpdateTest() throws Exception { + void NoUpdateTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") .getPath(); @@ -110,7 +110,7 @@ public class ResultToOrganizationJobTest { * @throws Exception */ @Test - public void UpdateNoMixTest() throws Exception { + void UpdateNoMixTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") .getPath(); @@ -176,7 +176,7 @@ public class ResultToOrganizationJobTest { } @Test - public void UpdateMixTest() throws Exception { + void UpdateMixTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix") .getPath(); diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 19febb9ed..17146903a 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -123,6 +123,7 @@ json4s-jackson_2.11 + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java index f06059dd5..7a3583289 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java @@ -12,8 +12,9 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.AccessRight; import eu.dnetlib.dhp.schema.oaf.Country; import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.Relation; -public class CleaningRuleMap extends HashMap> implements Serializable { +public class CleaningRuleMap extends HashMap, SerializableConsumer> implements Serializable { /** * Creates the mapping for the Oaf types subject to cleaning @@ -24,17 +25,31 @@ public class CleaningRuleMap extends HashMap CleaningRuleMap mapping = new CleaningRuleMap(); mapping.put(Qualifier.class, o -> cleanQualifier(vocabularies, (Qualifier) o)); mapping.put(AccessRight.class, o -> cleanQualifier(vocabularies, (AccessRight) o)); - mapping.put(Country.class, o -> { - final Country c = (Country) o; - if (StringUtils.isBlank(c.getSchemeid())) { - c.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE); - c.setSchemename(ModelConstants.DNET_COUNTRY_TYPE); - } - cleanQualifier(vocabularies, c); - }); + mapping.put(Country.class, o -> cleanCountry(vocabularies, (Country) o)); + mapping.put(Relation.class, o -> cleanRelation(vocabularies, (Relation) o)); return mapping; } + private static void cleanRelation(VocabularyGroup vocabularies, Relation r) { + if (vocabularies.vocabularyExists(ModelConstants.DNET_RELATION_SUBRELTYPE)) { + Qualifier newValue = vocabularies.lookup(ModelConstants.DNET_RELATION_SUBRELTYPE, r.getSubRelType()); + r.setSubRelType(newValue.getClassid()); + } + if (vocabularies.vocabularyExists(ModelConstants.DNET_RELATION_RELCLASS)) { + Qualifier newValue = vocabularies.lookup(ModelConstants.DNET_RELATION_RELCLASS, r.getRelClass()); + r.setRelClass(newValue.getClassid()); + } + } + + private static void cleanCountry(VocabularyGroup vocabularies, Country o) { + final Country c = o; + if (StringUtils.isBlank(c.getSchemeid())) { + c.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE); + c.setSchemename(ModelConstants.DNET_COUNTRY_TYPE); + } + cleanQualifier(vocabularies, c); + } + private static void cleanQualifier(VocabularyGroup vocabularies, Q q) { if (vocabularies.vocabularyExists(q.getSchemeid())) { Qualifier newValue = vocabularies.lookup(q.getSchemeid(), q.getClassid()); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java index 9ba153ba5..5502fd391 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java @@ -16,7 +16,7 @@ public class OafCleaner implements Serializable { try { navigate(oaf, mapping); } catch (IllegalAccessException e) { - throw new RuntimeException(e); + throw new IllegalStateException(e); } return oaf; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java index 00ddcb5a8..0d2df11fe 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java @@ -1,22 +1,21 @@ package eu.dnetlib.dhp.oa.graph.dump; -import java.io.*; +import java.io.IOException; +import java.io.Serializable; import java.util.Optional; -import org.apache.commons.compress.archivers.ar.ArArchiveEntry; -import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream; -import org.apache.commons.compress.archivers.tar.TarArchiveEntry; -import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.MakeTarArchive; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; public class MakeTar implements Serializable { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java index d118accba..dc740e811 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java @@ -8,6 +8,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -29,7 +30,7 @@ public class QueryInformationSystem { ""; public CommunityMap getCommunityMap() - throws ISLookUpException, DocumentException { + throws ISLookUpException, DocumentException, SAXException { return getMap(isLookUp.quickSearchProfile(XQUERY)); } @@ -42,12 +43,14 @@ public class QueryInformationSystem { this.isLookUp = isLookUpService; } - private CommunityMap getMap(List communityMap) throws DocumentException { + private CommunityMap getMap(List communityMap) throws DocumentException, SAXException { final CommunityMap map = new CommunityMap(); for (String xml : communityMap) { final Document doc; - doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + doc = reader.read(new StringReader(xml)); Element root = doc.getRootElement(); map.put(root.attribute("id").getValue(), root.attribute("label").getValue()); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index d30b3122c..500fe5986 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -93,7 +93,7 @@ public class ResultMapper implements Serializable { .setDocumentationUrl( value .stream() - .map(v -> v.getValue()) + .map(Field::getValue) .collect(Collectors.toList()))); Optional @@ -109,20 +109,20 @@ public class ResultMapper implements Serializable { .setContactgroup( Optional .ofNullable(ir.getContactgroup()) - .map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setContactperson( Optional .ofNullable(ir.getContactperson()) - .map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setTool( Optional .ofNullable(ir.getTool()) - .map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); @@ -132,7 +132,8 @@ public class ResultMapper implements Serializable { Optional .ofNullable(input.getAuthor()) - .ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList()))); + .ifPresent( + ats -> out.setAuthor(ats.stream().map(ResultMapper::getAuthor).collect(Collectors.toList()))); // I do not map Access Right UNKNOWN or OTHER @@ -219,11 +220,12 @@ public class ResultMapper implements Serializable { if (oInst.isPresent()) { if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { ((GraphResult) out) - .setInstance(oInst.get().stream().map(i -> getGraphInstance(i)).collect(Collectors.toList())); + .setInstance( + oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList())); } else { ((CommunityResult) out) .setInstance( - oInst.get().stream().map(i -> getCommunityInstance(i)).collect(Collectors.toList())); + oInst.get().stream().map(ResultMapper::getCommunityInstance).collect(Collectors.toList())); } } @@ -422,7 +424,7 @@ public class ResultMapper implements Serializable { Optional .ofNullable(i.getInstancetype()) .ifPresent(value -> instance.setType(value.getClassname())); - Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value)); + Optional.ofNullable(i.getUrl()).ifPresent(instance::setUrl); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java index 6ac626518..f86f6918f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java @@ -15,6 +15,7 @@ import org.apache.hadoop.fs.Path; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -31,25 +32,23 @@ public class SaveCommunityMap implements Serializable { private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class); private final QueryInformationSystem queryInformationSystem; - private final Configuration conf; private final BufferedWriter writer; public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException { - conf = new Configuration(); + final Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(conf); Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { - fileSystem.delete(hdfsWritePath); + fileSystem.delete(hdfsWritePath, true); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); queryInformationSystem = new QueryInformationSystem(); queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); - writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); - + FSDataOutputStream fos = fileSystem.create(hdfsWritePath); + writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); } public static void main(String[] args) throws Exception { @@ -74,10 +73,9 @@ public class SaveCommunityMap implements Serializable { final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl); scm.saveCommunityMap(); - } - private void saveCommunityMap() throws ISLookUpException, IOException, DocumentException { + private void saveCommunityMap() throws ISLookUpException, IOException, DocumentException, SAXException { writer.write(Utils.OBJECT_MAPPER.writeValueAsString(queryInformationSystem.getCommunityMap())); writer.close(); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java index fd8262544..ba26b708a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java @@ -17,9 +17,9 @@ import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; public class SendToZenodoHDFS implements Serializable { - private final static String NEW = "new"; // to be used for a brand new deposition in zenodo - private final static String VERSION = "version"; // to be used to upload a new version of a published deposition - private final static String UPDATE = "update"; // to upload content to an open deposition not published + private static final String NEW = "new"; // to be used for a brand new deposition in zenodo + private static final String VERSION = "version"; // to be used to upload a new version of a published deposition + private static final String UPDATE = "update"; // to upload content to an open deposition not published private static final Log log = LogFactory.getLog(SendToZenodoHDFS.class); @@ -85,7 +85,6 @@ public class SendToZenodoHDFS implements Serializable { Path p = fileStatus.getPath(); String p_string = p.toString(); if (!p_string.endsWith("_SUCCESS")) { - // String tmp = p_string.substring(0, p_string.lastIndexOf("/")); String name = p_string.substring(p_string.lastIndexOf("/") + 1); log.info("Sending information for community: " + name); if (communityMap.containsKey(name.substring(0, name.lastIndexOf(".")))) { @@ -102,9 +101,9 @@ public class SendToZenodoHDFS implements Serializable { zenodoApiClient.sendMretadata(metadata); } - if (publish) + if (publish) { zenodoApiClient.publish(); - + } } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java index 984e8b128..8e75e9d92 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java @@ -25,6 +25,9 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class Utils { public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private Utils() { + } + public static void removeOutputDir(SparkSession spark, String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } @@ -57,7 +60,7 @@ public class Utils { public static CommunityMap readCommunityMap(FileSystem fileSystem, String communityMapPath) throws IOException { BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(new Path(communityMapPath)))); - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); try { String line; while ((line = br.readLine()) != null) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java index 55f075e95..b92eb3e60 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.graph.dump.community; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; +import java.util.NoSuchElementException; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -48,36 +49,34 @@ public class CommunitySplit implements Serializable { .union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class)); communities - .stream() .forEach(c -> printResult(c, result, outputPath)); } - private static void printResult(String c, Dataset result, String outputPath) { - Dataset community_products = result - .filter((FilterFunction) r -> containsCommunity(r, c)); + private static void printResult(String community, Dataset result, String outputPath) { + Dataset communityProducts = result + .filter((FilterFunction) r -> containsCommunity(r, community)); try { - community_products.first(); - community_products + communityProducts.first(); + communityProducts .write() .option("compression", "gzip") .mode(SaveMode.Overwrite) - .json(outputPath + "/" + c); - } catch (Exception e) { - + .json(outputPath + "/" + community); + } catch (NoSuchElementException e) { + // ignoring it on purpose } - } - private static boolean containsCommunity(CommunityResult r, String c) { + private static boolean containsCommunity(CommunityResult r, String community) { if (Optional.ofNullable(r.getContext()).isPresent()) { - return r + return !r .getContext() .stream() - .filter(con -> con.getCode().equals(c)) + .filter(con -> con.getCode().equals(community)) .collect(Collectors.toList()) - .size() > 0; + .isEmpty(); } return false; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java index 63970d14b..7ab8a7540 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java @@ -2,9 +2,7 @@ package eu.dnetlib.dhp.oa.graph.dump.community; import java.io.Serializable; -import java.util.*; - -import javax.swing.text.html.Option; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.slf4j.Logger; @@ -12,7 +10,6 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.DumpProducts; -import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.oaf.Result; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java index 2d43888b4..0f2b4c2ed 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java @@ -22,6 +22,7 @@ import org.dom4j.Node; import org.dom4j.io.SAXReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Utils; @@ -29,6 +30,7 @@ import eu.dnetlib.dhp.schema.dump.oaf.Provenance; import eu.dnetlib.dhp.schema.dump.oaf.community.Funder; import eu.dnetlib.dhp.schema.dump.oaf.community.Project; import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Field; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; @@ -127,11 +129,11 @@ public class SparkPrepareResultProject implements Serializable { op.getCode().getValue(), Optional .ofNullable(op.getAcronym()) - .map(a -> a.getValue()) + .map(Field::getValue) .orElse(null), Optional .ofNullable(op.getTitle()) - .map(v -> v.getValue()) + .map(Field::getValue) .orElse(null), Optional .ofNullable(op.getFundingtree()) @@ -140,7 +142,7 @@ public class SparkPrepareResultProject implements Serializable { .stream() .map(ft -> getFunder(ft.getValue())) .collect(Collectors.toList()); - if (tmp.size() > 0) { + if (!tmp.isEmpty()) { return tmp.get(0); } else { return null; @@ -161,30 +163,23 @@ public class SparkPrepareResultProject implements Serializable { } private static Funder getFunder(String fundingtree) { - // ["nsf_________::NSFNSFNational Science - // FoundationUSnsf_________::NSF::CISE/OAD::CISE/CCFDivision - // of Computing and Communication FoundationsDivision of Computing and Communication - // Foundationsnsf_________::NSF::CISE/OADDirectorate for - // Computer & Information Science & EngineeringDirectorate for Computer & - // Information Science & - // Engineeringnsf:fundingStream"] - Funder f = new Funder(); + final Funder f = new Funder(); final Document doc; try { - doc = new SAXReader().read(new StringReader(fundingtree)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + doc = reader.read(new StringReader(fundingtree)); f.setShortName(((Node) (doc.selectNodes("//funder/shortname").get(0))).getText()); f.setName(((Node) (doc.selectNodes("//funder/name").get(0))).getText()); f.setJurisdiction(((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText()); for (Object o : doc.selectNodes("//funding_level_0")) { List node = ((Node) o).selectNodes("./name"); f.setFundingStream(((Node) node.get(0)).getText()); - } return f; - } catch (DocumentException e) { - e.printStackTrace(); + } catch (DocumentException | SAXException e) { + throw new IllegalArgumentException(e); } - return f; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java index 2b80b1d86..39ae32053 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java @@ -60,7 +60,7 @@ public class SparkUpdateProjectInfo implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - extend(spark, inputPath, outputPath, preparedInfoPath);// , inputClazz); + extend(spark, inputPath, outputPath, preparedInfoPath); }); } @@ -77,9 +77,7 @@ public class SparkUpdateProjectInfo implements Serializable { "left") .map((MapFunction, CommunityResult>) value -> { CommunityResult r = value._1(); - Optional.ofNullable(value._2()).ifPresent(rp -> { - r.setProjects(rp.getProjectsList()); - }); + Optional.ofNullable(value._2()).ifPresent(rp -> r.setProjects(rp.getProjectsList())); return r; }, Encoders.bean(CommunityResult.class)) .write() diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java index eb546624e..57708a78d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java @@ -23,5 +23,4 @@ public class Constants implements Serializable { public static final String CONTEXT_NS_PREFIX = "context_____"; public static final String UNKNOWN = "UNKNOWN"; - // public static final String FUNDER_DS = "entityregistry::projects"; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java index ccb84c713..120de9327 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java @@ -1,12 +1,14 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; -import java.io.*; +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.function.Consumer; import java.util.function.Function; -import org.apache.commons.crypto.utils.IoUtils; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -14,15 +16,13 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; -import org.apache.hadoop.io.compress.CompressionOutputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; /** * Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and @@ -33,8 +33,8 @@ import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative; public class CreateContextEntities implements Serializable { private static final Logger log = LoggerFactory.getLogger(CreateContextEntities.class); - private final Configuration conf; - private final BufferedWriter writer; + private final transient Configuration conf; + private final transient BufferedWriter writer; public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -88,7 +88,7 @@ public class CreateContextEntities implements Serializable { } public void execute(final Function producer, String isLookUpUrl) - throws Exception { + throws ISLookUpException { QueryInformationSystem queryInformationSystem = new QueryInformationSystem(); queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); @@ -101,10 +101,9 @@ public class CreateContextEntities implements Serializable { protected void writeEntity(final R r) { try { writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r)); - // log.info("writing context : {}", new Gson().toJson(r)); writer.newLine(); - } catch (final Exception e) { - throw new RuntimeException(e); + } catch (final IOException e) { + throw new IllegalArgumentException(e); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java index 102406315..10f2014d0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java @@ -7,6 +7,7 @@ import java.io.OutputStreamWriter; import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.function.Consumer; import java.util.function.Function; @@ -31,20 +32,21 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; * and the project is not created because of a low coverage in the profiles of openaire ids related to projects */ public class CreateContextRelation implements Serializable { - private static final Logger log = LoggerFactory.getLogger(CreateContextEntities.class); - private final Configuration conf; - private final BufferedWriter writer; - private final QueryInformationSystem queryInformationSystem; + private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class); + private final transient Configuration conf; + private final transient BufferedWriter writer; + private final transient QueryInformationSystem queryInformationSystem; private static final String CONTEX_RELATION_DATASOURCE = "contentproviders"; - private static final String CONTEX_RELATION_PROJECT = "projects"; public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( - CreateContextRelation.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json")); + Objects + .requireNonNull( + CreateContextRelation.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -70,10 +72,6 @@ public class CreateContextRelation implements Serializable { cce.execute(Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class)); log.info("Creating relations for projects... "); -// cce -// .execute( -// Process::getRelation, CONTEX_RELATION_PROJECT, -// ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class)); cce.close(); @@ -107,7 +105,7 @@ public class CreateContextRelation implements Serializable { public void execute(final Function> producer, String category, String prefix) { - final Consumer consumer = ci -> producer.apply(ci).forEach(c -> writeEntity(c)); + final Consumer consumer = ci -> producer.apply(ci).forEach(this::writeEntity); queryInformationSystem.getContextRelation(consumer, category, prefix); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java index 31d105b66..b0b8b8acd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java @@ -6,8 +6,6 @@ import java.util.ArrayList; import java.util.List; import org.apache.commons.lang3.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.Utils; @@ -21,8 +19,8 @@ import eu.dnetlib.dhp.schema.dump.oaf.graph.*; * context entity and datasource/projects related to the context. */ public class Process implements Serializable { - private static final Logger log = LoggerFactory.getLogger(Process.class); + @SuppressWarnings("unchecked") public static R getEntity(ContextInfo ci) { try { ResearchInitiative ri; @@ -35,7 +33,7 @@ public class Process implements Serializable { ri.setType(Constants.RESEARCH_INFRASTRUCTURE); } ri.setId(Utils.getContextId(ci.getId())); - ri.setOriginalId(ci.getId()); + ri.setAcronym(ci.getId()); ri.setDescription(ci.getDescription()); ri.setName(ci.getName()); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java index c33a693a5..0ed5de67c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java @@ -11,6 +11,7 @@ import org.dom4j.Element; import org.dom4j.Node; import org.dom4j.io.SAXReader; import org.jetbrains.annotations.NotNull; +import org.xml.sax.SAXException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -84,8 +85,9 @@ public class QueryInformationSystem { final Document doc; try { - - doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + doc = reader.read(new StringReader(xml)); Element root = doc.getRootElement(); cinfo.setId(root.attributeValue("id")); @@ -102,7 +104,7 @@ public class QueryInformationSystem { } consumer.accept(cinfo); - } catch (DocumentException e) { + } catch (DocumentException | SAXException e) { e.printStackTrace(); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java index 868fa89fe..4365e861f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java @@ -61,7 +61,7 @@ public class SparkOrganizationRelation implements Serializable { log.info("organization map : {}", new Gson().toJson(organizationMap)); final String communityMapPath = parser.get("communityMapPath"); - log.info("communityMapPath: {} ", communityMapPath); + log.info("communityMapPath: {}", communityMapPath); SparkConf conf = new SparkConf(); @@ -117,15 +117,12 @@ public class SparkOrganizationRelation implements Serializable { } })); - // if (relList.size() > 0) { spark .createDataset(relList, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); - // } - } @NotNull diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index 00f604b14..d8a1b5c21 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -4,7 +4,9 @@ package eu.dnetlib.dhp.oa.graph.dump.funderresults; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; -import java.util.*; +import java.util.List; +import java.util.Objects; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -14,16 +16,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.api.zenodo.Community; -import eu.dnetlib.dhp.oa.graph.dump.Constants; -import eu.dnetlib.dhp.oa.graph.dump.ResultMapper; import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.dump.oaf.community.Project; import eu.dnetlib.dhp.schema.oaf.Relation; -import scala.Tuple2; /** * Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java index 1a28a21f4..2d2b04b5c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java @@ -18,7 +18,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; @@ -62,6 +61,7 @@ public class SparkResultLinkedToProject implements Serializable { final String relationPath = parser.get("relationPath"); log.info("relationPath: {}", relationPath); + @SuppressWarnings("unchecked") Class inputClazz = (Class) Class.forName(resultClassName); SparkConf conf = new SparkConf(); @@ -95,9 +95,9 @@ public class SparkResultLinkedToProject implements Serializable { ._2() .getId(), Encoders.STRING()) - .mapGroups((MapGroupsFunction, R>) (k, it) -> { - return it.next()._2(); - }, Encoders.bean(inputClazz)) + .mapGroups( + (MapGroupsFunction, R>) (k, it) -> it.next()._2(), + Encoders.bean(inputClazz)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala new file mode 100644 index 000000000..ce383292c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -0,0 +1,142 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} +import org.apache.spark.sql.expressions.Aggregator + + +case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} +case class HostedByInfo(id: String, officialname: String, journal_id: String, provenance : String, id_type: String) {} + +object Aggregators { + + + + def getId(s1:String, s2:String) : String = { + if (s1.startsWith("10|")){ + return s1} + s2 + } + + def getValue(s1:String, s2:String) : String = { + if(!s1.equals("")){ + return s1 + } + s2 + } + + + def explodeHostedByItemType(df: Dataset[(String, HostedByItemType)]): Dataset[(String, HostedByItemType)] = { + val transformedData : Dataset[(String, HostedByItemType)] = df + .groupByKey(_._1)(Encoders.STRING) + .agg(Aggregators.hostedByAggregator) + .map{ + case (id:String , res:(String, HostedByItemType)) => res + }(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])) + + transformedData + } + + val hostedByAggregator: TypedColumn[(String, HostedByItemType), (String, HostedByItemType)] = new Aggregator[(String, HostedByItemType), (String, HostedByItemType), (String, HostedByItemType)] { + override def zero: (String, HostedByItemType) = ("", HostedByItemType("","","","","",false)) + override def reduce(b: (String, HostedByItemType), a:(String,HostedByItemType)): (String, HostedByItemType) = { + return merge(b, a) + } + override def merge(b1: (String, HostedByItemType), b2: (String, HostedByItemType)): (String, HostedByItemType) = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + if(b1._2.id.startsWith("10|")){ + return (b1._1, HostedByItemType(b1._2.id, b1._2.officialname, b1._2.issn, b1._2.eissn, b1._2.lissn, b1._2.openAccess || b2._2.openAccess)) + + } + return (b2._1, HostedByItemType(b2._2.id, b2._2.officialname, b2._2.issn, b2._2.eissn, b2._2.lissn, b1._2.openAccess || b2._2.openAccess)) + + } + override def finish(reduction: (String,HostedByItemType)): (String, HostedByItemType) = reduction + override def bufferEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) + + override def outputEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) + }.toColumn + + + + + def resultToSingleIdAggregator: TypedColumn[EntityInfo, EntityInfo] = new Aggregator[EntityInfo, EntityInfo, EntityInfo]{ + override def zero: EntityInfo = EntityInfo.newInstance("","","") + + override def reduce(b: EntityInfo, a:EntityInfo): EntityInfo = { + return merge(b, a) + } + override def merge(b1: EntityInfo, b2: EntityInfo): EntityInfo = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + if(!b1.getHostedById.equals("")){ + b1.setOpenAccess(b1.getOpenAccess || b2.getOpenAccess) + return b1 + } + b2.setOpenAccess(b1.getOpenAccess || b2.getOpenAccess) + b2 + + } + override def finish(reduction: EntityInfo): EntityInfo = reduction + override def bufferEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + override def outputEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + }.toColumn + + def resultToSingleId(df:Dataset[EntityInfo]): Dataset[EntityInfo] = { + val transformedData : Dataset[EntityInfo] = df + .groupByKey(_.getId)(Encoders.STRING) + .agg(Aggregators.resultToSingleIdAggregator) + .map{ + case (id:String , res: EntityInfo) => res + }(Encoders.bean(classOf[EntityInfo])) + + transformedData + } + + def datasourceToSingleIdAggregator: TypedColumn[EntityInfo, EntityInfo] = new Aggregator[EntityInfo, EntityInfo, EntityInfo]{ + override def zero: EntityInfo = EntityInfo.newInstance("","","") + + override def reduce(b: EntityInfo, a:EntityInfo): EntityInfo = { + return merge(b, a) + } + override def merge(b1: EntityInfo, b2: EntityInfo): EntityInfo = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + if(!b1.getHostedById.equals("")){ + return b1 + } + b2 + + } + override def finish(reduction: EntityInfo): EntityInfo = reduction + override def bufferEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + override def outputEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + }.toColumn + + + def datasourceToSingleId(df:Dataset[EntityInfo]): Dataset[EntityInfo] = { + val transformedData : Dataset[EntityInfo] = df + .groupByKey(_.getHostedById)(Encoders.STRING) + .agg(Aggregators.datasourceToSingleIdAggregator) + .map{ + case (id:String , res: EntityInfo) => res + }(Encoders.bean(classOf[EntityInfo])) + + transformedData + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java new file mode 100644 index 000000000..b29877a48 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java @@ -0,0 +1,13 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +public class Constants { + + public static final String OPENAIRE = "openaire"; + public static final String DOAJ = "doaj"; + public static final String UNIBI = "unibi"; + + public static final String ISSN = "issn"; + public static final String EISSN = "eissn"; + public static final String ISSNL = "issnl"; +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java new file mode 100644 index 000000000..dff761c34 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java @@ -0,0 +1,95 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import java.io.*; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Objects; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.GetCSV; +import eu.dnetlib.dhp.common.collection.HttpConnector2; + +public class DownloadCSV { + + private static final Logger log = LoggerFactory.getLogger(DownloadCSV.class); + + public static final char DEFAULT_DELIMITER = ';'; + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + DownloadCSV.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json")))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + log.info("fileURL {}", fileURL); + + final String workingPath = parser.get("workingPath"); + log.info("workingPath {}", workingPath); + + final String outputFile = parser.get("outputFile"); + log.info("outputFile {}", outputFile); + + final String hdfsNameNode = parser.get("hdfsNameNode"); + log.info("hdfsNameNode {}", hdfsNameNode); + + final String classForName = parser.get("classForName"); + log.info("classForName {}", classForName); + + final char delimiter = Optional + .ofNullable(parser.get("delimiter")) + .map(s -> s.charAt(0)) + .orElse(DEFAULT_DELIMITER); + log.info("delimiter {}", delimiter); + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + + new DownloadCSV().doDownload(fileURL, workingPath, outputFile, classForName, delimiter, fileSystem); + + } + + protected void doDownload(String fileURL, String workingPath, String outputFile, String classForName, + char delimiter, FileSystem fs) + throws IOException, ClassNotFoundException, CollectorException { + + final HttpConnector2 connector2 = new HttpConnector2(); + + final Path path = new Path(workingPath + "/replaced.csv"); + + try (BufferedReader in = new BufferedReader( + new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { + + try (PrintWriter writer = new PrintWriter( + new OutputStreamWriter(fs.create(path, true), StandardCharsets.UTF_8))) { + String line; + while ((line = in.readLine()) != null) { + writer.println(line.replace("\\\"", "\"")); + } + } + } + + try (InputStreamReader reader = new InputStreamReader(fs.open(path))) { + GetCSV.getCsv(fs, reader, outputFile, classForName, delimiter); + } + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java new file mode 100644 index 000000000..d82d00862 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java @@ -0,0 +1,84 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import java.io.*; +import java.util.Objects; +import java.util.Optional; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.GetCSV; +import eu.dnetlib.dhp.common.collection.HttpConnector2; + +public class DownloadCSV2 { + + private static final Logger log = LoggerFactory.getLogger(DownloadCSV2.class); + + public static final char DEFAULT_DELIMITER = ';'; + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + DownloadCSV2.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json")))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + log.info("fileURL {}", fileURL); + + final String tmpFile = parser.get("tmpFile"); + log.info("tmpFile {}", tmpFile); + + final String outputFile = parser.get("outputFile"); + log.info("outputFile {}", outputFile); + + final String hdfsNameNode = parser.get("hdfsNameNode"); + log.info("hdfsNameNode {}", hdfsNameNode); + + final String classForName = parser.get("classForName"); + log.info("classForName {}", classForName); + + final char delimiter = Optional + .ofNullable(parser.get("delimiter")) + .map(s -> s.charAt(0)) + .orElse(DEFAULT_DELIMITER); + log.info("delimiter {}", delimiter); + + HttpConnector2 connector2 = new HttpConnector2(); + + try (BufferedReader in = new BufferedReader( + new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { + + try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(tmpFile)))) { + String line; + while ((line = in.readLine()) != null) { + writer.println(line.replace("\\\"", "\"")); + } + } + } + + try (BufferedReader in = new BufferedReader(new FileReader(tmpFile))) { + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + + GetCSV.getCsv(fileSystem, in, outputFile, classForName, delimiter); + } finally { + FileUtils.deleteQuietly(new File(tmpFile)); + } + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala new file mode 100644 index 000000000..1b18ba3ae --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -0,0 +1,71 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToResult.{applyHBtoPubs, getClass} +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.{Datasource, Publication} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.json4s.DefaultFormats +import org.slf4j.{Logger, LoggerFactory} + +object SparkApplyHostedByMapToDatasource { + + def applyHBtoDats(join: Dataset[EntityInfo], dats: Dataset[Datasource]): Dataset[Datasource] = { + dats.joinWith(join, dats.col("id").equalTo(join.col("hostedById")), "left") + .map(t2 => { + val d: Datasource = t2._1 + if (t2._2 != null) { + if (d.getOpenairecompatibility.getClassid.equals(ModelConstants.UNKNOWN)) { + d.getOpenairecompatibility.setClassid("hostedBy") + d.getOpenairecompatibility.setClassname("collected from a compatible aggregator") + } + } + d + })(Encoders.bean((classOf[Datasource]))) + } + + def main(args: Array[String]): Unit = { + + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + val graphPath = parser.get("graphPath") + val outputPath = parser.get("outputPath") + val preparedInfoPath = parser.get("preparedInfoPath") + + implicit val formats = DefaultFormats + + implicit val mapEncoderPubs: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) + implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + val mapper = new ObjectMapper() + + val dats : Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") + .map(r => mapper.readValue(r, classOf[Datasource])) + + val pinfo : Dataset[EntityInfo] = Aggregators.datasourceToSingleId( spark.read.textFile(preparedInfoPath) + .map(ei => mapper.readValue(ei, classOf[EntityInfo]))) + + applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) + + spark.read.textFile(outputPath) + .write + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .text(graphPath + "/datasource") + } + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala new file mode 100644 index 000000000..0e047d016 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -0,0 +1,86 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils +import eu.dnetlib.dhp.schema.oaf.{Datasource, Instance, OpenAccessRoute, Publication} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.json4s.DefaultFormats +import org.slf4j.{Logger, LoggerFactory} + +import scala.collection.JavaConverters._ + + +object SparkApplyHostedByMapToResult { + + def applyHBtoPubs(join: Dataset[EntityInfo], pubs: Dataset[Publication]) = { + pubs.joinWith(join, pubs.col("id").equalTo(join.col("id")), "left") + .map(t2 => { + val p: Publication = t2._1 + if (t2._2 != null) { + val ei: EntityInfo = t2._2 + val i = p.getInstance().asScala + if (i.size == 1) { + val inst: Instance = i(0) + inst.getHostedby.setKey(ei.getHostedById) + inst.getHostedby.setValue(ei.getName) + if (ei.getOpenAccess) { + inst.setAccessright(OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN, "Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)) + inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) + p.setBestaccessright(OafMapperUtils.createBestAccessRights(p.getInstance())); + } + + } + } + p + })(Encoders.bean(classOf[Publication])) + } + def main(args: Array[String]): Unit = { + + + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val graphPath = parser.get("graphPath") + + val outputPath = parser.get("outputPath") + val preparedInfoPath = parser.get("preparedInfoPath") + + + implicit val formats = DefaultFormats + + + implicit val mapEncoderPubs: Encoder[Publication] = Encoders.bean(classOf[Publication]) + implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + val mapper = new ObjectMapper() + + val pubs : Dataset[Publication] = spark.read.textFile(graphPath + "/publication") + .map(r => mapper.readValue(r, classOf[Publication])) + + val pinfo : Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) + .map(ei => mapper.readValue(ei, classOf[EntityInfo])) + + applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) + + spark.read.textFile(outputPath) + .write + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .text(graphPath + "/publication") + } + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala new file mode 100644 index 000000000..b7a7d352f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -0,0 +1,125 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo + +import eu.dnetlib.dhp.schema.oaf.{Journal, Publication} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.json4s +import org.json4s.DefaultFormats +import org.json4s.jackson.JsonMethods.parse +import org.slf4j.{Logger, LoggerFactory} + + + +object SparkPrepareHostedByInfoToApply { + + implicit val mapEncoderPInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + def getList(id: String, j: Journal, name: String ) : List[EntityInfo] = { + var lst:List[EntityInfo] = List() + + + if (j.getIssnLinking != null && !j.getIssnLinking.equals("")){ + lst = EntityInfo.newInstance(id, j.getIssnLinking, name) :: lst + } + if (j.getIssnOnline != null && !j.getIssnOnline.equals("")){ + lst = EntityInfo.newInstance(id, j.getIssnOnline, name) :: lst + } + if (j.getIssnPrinted != null && !j.getIssnPrinted.equals("")){ + lst = EntityInfo.newInstance(id, j.getIssnPrinted, name) :: lst + } + lst + } + + def prepareResultInfo(spark:SparkSession, publicationPath:String) : Dataset[EntityInfo] = { + implicit val mapEncoderPubs: Encoder[Publication] = Encoders.bean(classOf[Publication]) + + val mapper = new ObjectMapper() + + val dd : Dataset[Publication] = spark.read.textFile(publicationPath) + .map(r => mapper.readValue(r, classOf[Publication])) + + dd.filter(p => p.getJournal != null ).flatMap(p => getList(p.getId, p.getJournal, "")) + + } + + + def toEntityInfo(input:String): EntityInfo = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + + lazy val json: json4s.JValue = parse(input) + val c :Map[String,HostedByItemType] = json.extract[Map[String, HostedByItemType]] + toEntityItem(c.keys.head, c.values.head) + } + + + def toEntityItem(journal_id: String , hbi: HostedByItemType): EntityInfo = { + + EntityInfo.newInstance(hbi.id, journal_id, hbi.officialname, hbi.openAccess) + + } + + def joinResHBM(res: Dataset[EntityInfo], hbm: Dataset[EntityInfo]): Dataset[EntityInfo] = { + Aggregators.resultToSingleId(res.joinWith(hbm, res.col("journalId").equalTo(hbm.col("journalId")), "left") + .map(t2 => { + val res: EntityInfo = t2._1 + if(t2._2 != null ){ + val ds = t2._2 + res.setHostedById(ds.getId) + res.setOpenAccess(ds.getOpenAccess) + res.setName(ds.getName) + } + res + })) + } + + def main(args: Array[String]): Unit = { + + + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val graphPath = parser.get("graphPath") + + val outputPath = parser.get("preparedInfoPath") + val hostedByMapPath = parser.get("hostedByMapPath") + + + implicit val formats = DefaultFormats + + + logger.info("Getting the Datasources") + + import spark.implicits._ + + + //STEP1: read the hostedbymap and transform it in EntityInfo + val hostedByInfo:Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) + + //STEP2: create association (publication, issn), (publication, eissn), (publication, lissn) + val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") + + //STEP3: left join resultInfo with hostedByInfo on journal_id. Reduction of all the results with the same id in just + //one entry (one result could be associated to issn and eissn and so possivly matching more than once against the map) + //to this entry we add the id of the datasource for the next step + joinResHBM(resultInfoDataset, hostedByInfo) + .write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) + + + } + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala new file mode 100644 index 000000000..1ee1d5d1a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -0,0 +1,222 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DOAJModel, UnibiGoldModel} +import eu.dnetlib.dhp.schema.oaf.Datasource +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.json4s.DefaultFormats +import org.slf4j.{Logger, LoggerFactory} +import com.fasterxml.jackson.databind.ObjectMapper +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.fs.Path +import java.io.PrintWriter + +import org.apache.hadoop.io.compress.GzipCodec + + +object SparkProduceHostedByMap { + + + implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) + + + def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)) : HostedByItemType = { + val openaire: HostedByInfo = input._1._1 + val doaj: HostedByInfo = input._1._2 + val gold: HostedByInfo = input._2 + val isOpenAccess: Boolean = doaj == null && gold == null + + openaire.journal_id match { + case Constants.ISSN => HostedByItemType(openaire.id, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) + case Constants.EISSN => HostedByItemType(openaire.id, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) + case Constants.ISSNL => HostedByItemType(openaire.id, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) + + // catch the default with a variable so you can print it + case whoa => null + } + } + + def toHostedByMap(input: (String, HostedByItemType)): String = { + import org.json4s.jackson.Serialization + + implicit val formats = org.json4s.DefaultFormats + + val map: Map [String, HostedByItemType] = Map (input._1 -> input._2 ) + + Serialization.write(map) + + + } + + + + def getHostedByItemType(id:String, officialname: String, issn:String, eissn:String, issnl:String, oa:Boolean): HostedByItemType = { + if(issn != null){ + if(eissn != null){ + if(issnl != null){ + HostedByItemType(id, officialname, issn, eissn, issnl , oa) + }else{ + HostedByItemType(id, officialname, issn, eissn, "" , oa) + } + }else{ + if(issnl != null){ + HostedByItemType(id, officialname, issn, "", issnl , oa) + }else{ + HostedByItemType(id, officialname, issn, "", "" , oa) + } + } + }else{ + if(eissn != null){ + if(issnl != null){ + HostedByItemType(id, officialname, "", eissn, issnl , oa) + }else{ + HostedByItemType(id, officialname, "", eissn, "" , oa) + } + }else{ + if(issnl != null){ + HostedByItemType(id, officialname, "", "", issnl , oa) + }else{ + HostedByItemType("", "", "", "", "" , oa) + } + } + } + } + + def oaToHostedbyItemType(dats: Datasource): HostedByItemType = { + if (dats.getJournal != null) { + + return getHostedByItemType(dats.getId, dats.getOfficialname.getValue, dats.getJournal.getIssnPrinted, dats.getJournal.getIssnOnline, dats.getJournal.getIssnLinking, false) + } + HostedByItemType("","","","","",false) + } + + def oaHostedByDataset(spark:SparkSession, datasourcePath : String) : Dataset[HostedByItemType] = { + + import spark.implicits._ + + + val mapper = new ObjectMapper() + + implicit var encoderD = Encoders.kryo[Datasource] + + val dd : Dataset[Datasource] = spark.read.textFile(datasourcePath) + .map(r => mapper.readValue(r, classOf[Datasource])) + + dd.map{ddt => oaToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + + } + + + def goldToHostedbyItemType(gold: UnibiGoldModel): HostedByItemType = { + return getHostedByItemType(Constants.UNIBI, gold.getTitle, gold.getIssn, "", gold.getIssnL, true) + } + + + def goldHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { + import spark.implicits._ + + implicit val mapEncoderUnibi: Encoder[UnibiGoldModel] = Encoders.kryo[UnibiGoldModel] + + val mapper = new ObjectMapper() + + val dd : Dataset[UnibiGoldModel] = spark.read.textFile(datasourcePath) + .map(r => mapper.readValue(r, classOf[UnibiGoldModel])) + + dd.map{ddt => goldToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + + } + + def doajToHostedbyItemType(doaj: DOAJModel): HostedByItemType = { + + return getHostedByItemType(Constants.DOAJ, doaj.getJournalTitle, doaj.getIssn, doaj.getEissn, "", true) + } + + def doajHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { + import spark.implicits._ + + implicit val mapEncoderDOAJ: Encoder[DOAJModel] = Encoders.kryo[DOAJModel] + + val mapper = new ObjectMapper() + + val dd : Dataset[DOAJModel] = spark.read.textFile(datasourcePath) + .map(r => mapper.readValue(r, classOf[DOAJModel])) + + dd.map{ddt => doajToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + + } + + def toList(input: HostedByItemType): List[(String, HostedByItemType)] = { + var lst : List[(String, HostedByItemType)] = List() + if(!input.issn.equals("")){ + lst = (input.issn, input) :: lst + } + if(!input.eissn.equals("")){ + lst = (input.eissn, input) :: lst + } + if(!input.lissn.equals("")){ + lst = (input.lissn, input) :: lst + } + lst + } + + + + def writeToHDFS(input: Array[String], outputPath: String, hdfsNameNode : String):Unit = { + val conf = new Configuration() + + conf.set("fs.defaultFS", hdfsNameNode) + val fs= FileSystem.get(conf) + val output = fs.create(new Path(outputPath)) + val writer = new PrintWriter(output) + try { + input.foreach(hbi => writer.println(hbi)) + } + finally { + writer.close() + + } + + } + + + + def main(args: Array[String]): Unit = { + + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val datasourcePath = parser.get("datasourcePath") + val workingDirPath = parser.get("workingPath") + val outputPath = parser.get("outputPath") + + + implicit val formats = DefaultFormats + + + logger.info("Getting the Datasources") + + + Aggregators.explodeHostedByItemType(oaHostedByDataset(spark, datasourcePath) + .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold.json")) + .union(doajHostedByDataset(spark, workingDirPath + "/doaj.json")) + .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) + .map(hbi => toHostedByMap(hbi))(Encoders.STRING) + .rdd.saveAsTextFile(outputPath , classOf[GzipCodec]) + + + } + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java new file mode 100644 index 000000000..4b5dc22a6 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java @@ -0,0 +1,53 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +public class DOAJModel implements Serializable { + + @CsvBindByName(column = "Journal title") + private String journalTitle; + + @CsvBindByName(column = "Journal ISSN (print version)") + private String issn; + + @CsvBindByName(column = "Journal EISSN (online version)") + private String eissn; + + @CsvBindByName(column = "Review process") + private String reviewProcess; + + public String getJournalTitle() { + return journalTitle; + } + + public void setJournalTitle(String journalTitle) { + this.journalTitle = journalTitle; + } + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getEissn() { + return eissn; + } + + public void setEissn(String eissn) { + this.eissn = eissn; + } + + public String getReviewProcess() { + return reviewProcess; + } + + public void setReviewProcess(String reviewProcess) { + this.reviewProcess = reviewProcess; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java new file mode 100644 index 000000000..1c6c88fe7 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java @@ -0,0 +1,67 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model; + +import java.io.Serializable; + +public class EntityInfo implements Serializable { + private String id; + private String journalId; + private String name; + private Boolean openAccess; + private String hostedById; + + public static EntityInfo newInstance(String id, String journalId, String name) { + return newInstance(id, journalId, name, false); + + } + + public static EntityInfo newInstance(String id, String journalId, String name, Boolean openaccess) { + EntityInfo pi = new EntityInfo(); + pi.id = id; + pi.journalId = journalId; + pi.name = name; + pi.openAccess = openaccess; + pi.hostedById = ""; + return pi; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getJournalId() { + return journalId; + } + + public void setJournalId(String journalId) { + this.journalId = journalId; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public Boolean getOpenAccess() { + return openAccess; + } + + public void setOpenAccess(Boolean openAccess) { + this.openAccess = openAccess; + } + + public String getHostedById() { + return hostedById; + } + + public void setHostedById(String hostedById) { + this.hostedById = hostedById; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java new file mode 100644 index 000000000..8f24cd615 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java @@ -0,0 +1,50 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +public class UnibiGoldModel implements Serializable { + + @CsvBindByName(column = "ISSN") + private String issn; + @CsvBindByName(column = "ISSN_L") + private String issnL; + @CsvBindByName(column = "TITLE") + private String title; + @CsvBindByName(column = "TITLE_SOURCE") + private String titleSource; + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getIssnL() { + return issnL; + } + + public void setIssnL(String issnL) { + this.issnL = issnL; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getTitleSource() { + return titleSource; + } + + public void setTitleSource(String titleSource) { + this.titleSource = titleSource; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java index 59bdb3914..f87c0eb7a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java @@ -74,24 +74,4 @@ public class DatasourceCompatibilityComparator implements Comparator return lClass.compareTo(rClass); } - /* - * CASE WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY - * ['openaire-cris_1.1']) THEN 'openaire-cris_1.1@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT - * COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire4.0']) THEN - * 'openaire4.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0']) THEN - * 'driver-openaire2.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE - * (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['driver']) THEN - * 'driver@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility) :: TEXT) @> ARRAY ['openaire2.0']) THEN 'openaire2.0@@@dnet:datasourceCompatibilityLevel' WHEN - * (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire3.0']) THEN - * 'openaire3.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility) :: TEXT) @> ARRAY ['openaire2.0_data']) THEN - * 'openaire2.0_data@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE - * (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['native']) THEN - * 'native@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility) :: TEXT) @> ARRAY ['hostedBy']) THEN 'hostedBy@@@dnet:datasourceCompatibilityLevel' WHEN - * (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['notCompatible']) - * THEN 'notCompatible@@@dnet:datasourceCompatibilityLevel' ELSE 'UNKNOWN@@@dnet:datasourceCompatibilityLevel' END - */ } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java index 602213e58..ef419a042 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java @@ -6,8 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.*; import java.util.stream.Collectors; -import javax.xml.crypto.Data; - import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -16,7 +14,6 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; -import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,7 +21,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.oa.graph.clean.CleanGraphSparkJob; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -55,9 +51,11 @@ public class MergeGraphTableSparkJob { String jsonConfiguration = IOUtils .toString( - CleanGraphSparkJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/merge_graphs_parameters.json")); + Objects + .requireNonNull( + MergeGraphTableSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/merge_graphs_parameters.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -133,7 +131,7 @@ public class MergeGraphTableSparkJob { HashSet collectedFromNames = Optional .ofNullable(o.getCollectedfrom()) .map(c -> c.stream().map(KeyValue::getValue).collect(Collectors.toCollection(HashSet::new))) - .orElse(new HashSet()); + .orElse(new HashSet<>()); return !collectedFromNames.contains("Datacite"); }) .write() diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 03c3eeb3c..526f45f6e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -28,12 +28,7 @@ import java.util.Optional; import java.util.Set; import org.apache.commons.lang3.StringUtils; -import org.dom4j.Document; -import org.dom4j.DocumentFactory; -import org.dom4j.DocumentHelper; -import org.dom4j.Node; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.dom4j.*; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -88,8 +83,6 @@ public abstract class AbstractMdRecordToOafMapper { protected static final Map nsContext = new HashMap<>(); - private static final Logger log = LoggerFactory.getLogger(DispatchEntitiesApplication.class); - static { nsContext.put("dr", "http://www.driver-repository.eu/namespace/dr"); nsContext.put("dri", "http://www.driver-repository.eu/namespace/dri"); @@ -116,66 +109,56 @@ public abstract class AbstractMdRecordToOafMapper { this.forceOriginalId = false; } - public List processMdRecord(final String xml) { + public List processMdRecord(final String xml) throws DocumentException { - // log.info("Processing record: " + xml); + DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); - try { - DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); + final Document doc = DocumentHelper + .parseText( + xml + .replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3) + .replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3) + .replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3)); - final Document doc = DocumentHelper - .parseText( - xml - .replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3) - .replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3) - .replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3)); + final KeyValue collectedFrom = getProvenanceDatasource( + doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); - final KeyValue collectedFrom = getProvenanceDatasource( - doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); - - if (collectedFrom == null) { - return null; - } - - final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) - ? collectedFrom - : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); - - if (hostedBy == null) { - return null; - } - - final DataInfo info = prepareDataInfo(doc, invisible); - final long lastUpdateTimestamp = new Date().getTime(); - - final List instances = prepareInstances(doc, info, collectedFrom, hostedBy); - - final String type = getResultType(doc, instances); - - return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); - } catch (final Exception e) { - throw new RuntimeException(e); + if (collectedFrom == null) { + return Lists.newArrayList(); } + + final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) + ? collectedFrom + : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); + + if (hostedBy == null) { + return Lists.newArrayList(); + } + + final DataInfo info = prepareDataInfo(doc, invisible); + final long lastUpdateTimestamp = new Date().getTime(); + + final List instances = prepareInstances(doc, info, collectedFrom, hostedBy); + + final String type = getResultType(doc, instances); + + return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); } protected String getResultType(final Document doc, final List instances) { final String type = doc.valueOf("//dr:CobjCategory/@type"); - if (StringUtils.isBlank(type) & vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { + if (StringUtils.isBlank(type) && vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { final String instanceType = instances .stream() .map(i -> i.getInstancetype().getClassid()) .findFirst() - .map(s -> UNKNOWN.equalsIgnoreCase(s) ? "0000" : s) + .filter(s -> !UNKNOWN.equalsIgnoreCase(s)) .orElse("0000"); // Unknown return Optional .ofNullable(vocs.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, instanceType)) - .map(q -> q.getClassid()) + .map(Qualifier::getClassid) .orElse("0000"); - /* - * .orElseThrow( () -> new IllegalArgumentException( String.format("'%s' not mapped in %s", instanceType, - * DNET_RESULT_TYPOLOGIES))); - */ } return type; @@ -185,7 +168,7 @@ public abstract class AbstractMdRecordToOafMapper { final String dsId = doc.valueOf(xpathId); final String dsName = doc.valueOf(xpathName); - if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { + if (StringUtils.isBlank(dsId) || StringUtils.isBlank(dsName)) { return null; } @@ -498,7 +481,6 @@ public abstract class AbstractMdRecordToOafMapper { accessRight.setSchemename(qualifier.getSchemename()); // TODO set the OAStatus - // accessRight.setOaStatus(...); return accessRight; } @@ -511,22 +493,6 @@ public abstract class AbstractMdRecordToOafMapper { return vocs.getTermAsQualifier(schemeId, classId); } - protected List prepareListStructProps( - final Node node, - final String xpath, - final String xpathClassId, - final String schemeId, - final DataInfo info) { - final List res = new ArrayList<>(); - - for (final Object o : node.selectNodes(xpath)) { - final Node n = (Node) o; - final String classId = n.valueOf(xpathClassId).trim(); - res.add(structuredProperty(n.getText(), prepareQualifier(classId, schemeId), info)); - } - return res; - } - protected List prepareListStructPropsWithValidQualifier( final Node node, final String xpath, diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index bbfb7429f..6bb18c375 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -17,6 +17,7 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; +import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -127,7 +128,7 @@ public class GenerateEntitiesApplication { .map(k -> new Tuple2<>(k._1().toString(), k._2().toString())) .map(k -> convertToListOaf(k._1(), k._2(), shouldHashId, vocs)) .filter(Objects::nonNull) - .flatMap(list -> list.iterator())); + .flatMap(List::iterator)); } switch (mode) { @@ -135,7 +136,7 @@ public class GenerateEntitiesApplication { save( inputRdd .mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf)) - .reduceByKey((o1, o2) -> OafMapperUtils.merge(o1, o2)) + .reduceByKey(OafMapperUtils::merge) .map(Tuple2::_2), targetPath); break; @@ -158,7 +159,7 @@ public class GenerateEntitiesApplication { final String id, final String s, final boolean shouldHashId, - final VocabularyGroup vocs) { + final VocabularyGroup vocs) throws DocumentException { final String type = StringUtils.substringAfter(id, ":"); switch (type.toLowerCase()) { @@ -191,7 +192,7 @@ public class GenerateEntitiesApplication { case "otherresearchproduct": return Arrays.asList(convertFromJson(s, OtherResearchProduct.class)); default: - throw new RuntimeException("type not managed: " + type.toLowerCase()); + throw new IllegalArgumentException("type not managed: " + type.toLowerCase()); } } @@ -199,9 +200,9 @@ public class GenerateEntitiesApplication { try { return OBJECT_MAPPER.readValue(s, clazz); } catch (final Exception e) { - log.error("Error parsing object of class: " + clazz); + log.error("Error parsing object of class: {}", clazz); log.error(s); - throw new RuntimeException(e); + throw new IllegalArgumentException(e); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java index d5c310c1b..ee1b6a5da 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java @@ -40,9 +40,11 @@ public class MergeClaimsApplication { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - MigrateMongoMdstoresApplication.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); + Objects + .requireNonNull( + MergeClaimsApplication.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json")))); parser.parseArgument(args); Boolean isSparkSessionManaged = Optional diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index a9d3e05fe..d78732f9b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -1,8 +1,41 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASOURCE_ORGANIZATION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTICIPANT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_MERGED_IN; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PARTICIPANT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PROVIDED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO; +import static eu.dnetlib.dhp.schema.common.ModelConstants.MERGES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ORG_ORG_RELTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PARTICIPATION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROJECT_ORGANIZATION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVIDES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVISION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DATASET; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RELATIONSHIP; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.asString; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.createOpenaireId; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.journal; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listFields; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; import java.io.Closeable; import java.io.IOException; @@ -53,8 +86,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i private static final Logger log = LoggerFactory.getLogger(MigrateDbEntitiesApplication.class); private static final DataInfo DATA_INFO_CLAIM = dataInfo( - false, null, false, false, - qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), "0.9"); + false, null, false, false, qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), + "0.9"); private static final List COLLECTED_FROM_CLAIM = listKeyValues( createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE"); @@ -140,8 +173,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i case openorgs_dedup: // generates organization entities and relations for openorgs dedup log.info("Processing Openorgs..."); smdbe - .execute( - "queryOpenOrgsForOrgsDedup.sql", smdbe::processOrganization, verifyNamespacePrefix); + .execute("queryOpenOrgsForOrgsDedup.sql", smdbe::processOrganization, verifyNamespacePrefix); log.info("Processing Openorgs Sim Rels..."); smdbe.execute("queryOpenOrgsSimilarityForOrgsDedup.sql", smdbe::processOrgOrgSimRels); @@ -150,8 +182,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i case openorgs: // generates organization entities and relations for provision log.info("Processing Openorgs For Provision..."); smdbe - .execute( - "queryOpenOrgsForProvision.sql", smdbe::processOrganization, verifyNamespacePrefix); + .execute("queryOpenOrgsForProvision.sql", smdbe::processOrganization, verifyNamespacePrefix); log.info("Processing Openorgs Merge Rels..."); smdbe.execute("queryOpenOrgsSimilarityForProvision.sql", smdbe::processOrgOrgMergeRels); @@ -229,6 +260,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB ds.setOaiprovenance(null); // Values not present in the DB ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype"))); + ds.setDatasourcetypeui(prepareQualifierSplitting(rs.getString("datasourcetypeui"))); ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility"))); ds.setOfficialname(field(rs.getString("officialname"), info)); ds.setEnglishname(field(rs.getString("englishname"), info)); @@ -270,6 +302,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i ds.setDataInfo(info); ds.setLastupdatetimestamp(lastUpdateTimestamp); + ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction"))); + ds.setThematic(rs.getBoolean("thematic")); + ds.setKnowledgegraph(rs.getBoolean("knowledgegraph")); + ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies"))); + return Arrays.asList(ds); } catch (final Exception e) { throw new RuntimeException(e); @@ -495,8 +532,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i throw new IllegalStateException( String .format( - "invalid claim, sourceId: %s, targetId: %s, semantics: %s", - sourceId, targetId, semantics)); + "invalid claim, sourceId: %s, targetId: %s, semantics: %s", sourceId, targetId, + semantics)); } r1 = setRelationSemantic(r1, RESULT_PROJECT, OUTCOME, PRODUCES); r2 = setRelationSemantic(r2, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY); @@ -516,8 +553,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i } } - private Relation prepareRelation(String sourceId, String targetId, String validationDate) { - Relation r = new Relation(); + private Relation prepareRelation(final String sourceId, final String targetId, final String validationDate) { + final Relation r = new Relation(); if (StringUtils.isNotBlank(validationDate)) { r.setValidated(true); r.setValidationDate(validationDate); @@ -530,7 +567,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i return r; } - private Relation setRelationSemantic(Relation r, String relType, String subRelType, String relClass) { + private Relation setRelationSemantic(final Relation r, final String relType, final String subRelType, + final String relClass) { r.setRelType(relType); r.setSubRelType(subRelType); r.setRelClass(relClass); @@ -603,6 +641,19 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i return res; } + private List prepareListOfQualifiers(final Array array) throws SQLException { + final List res = new ArrayList<>(); + if (array != null) { + for (final String s : (String[]) array.getArray()) { + final Qualifier q = prepareQualifierSplitting(s); + if (q != null) { + res.add(q); + } + } + } + return res; + } + public List processOrgOrgMergeRels(final ResultSet rs) { try { final DataInfo info = prepareDataInfo(rs); // TODO @@ -660,16 +711,16 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i r1.setLastupdatetimestamp(lastUpdateTimestamp); // removed because there's no difference between two sides //TODO -// final Relation r2 = new Relation(); -// r2.setRelType(ORG_ORG_RELTYPE); -// r2.setSubRelType(ORG_ORG_SUBRELTYPE); -// r2.setRelClass(relClass); -// r2.setSource(orgId2); -// r2.setTarget(orgId1); -// r2.setCollectedfrom(collectedFrom); -// r2.setDataInfo(info); -// r2.setLastupdatetimestamp(lastUpdateTimestamp); -// return Arrays.asList(r1, r2); + // final Relation r2 = new Relation(); + // r2.setRelType(ORG_ORG_RELTYPE); + // r2.setSubRelType(ORG_ORG_SUBRELTYPE); + // r2.setRelClass(relClass); + // r2.setSource(orgId2); + // r2.setTarget(orgId1); + // r2.setCollectedfrom(collectedFrom); + // r2.setDataInfo(info); + // r2.setLastupdatetimestamp(lastUpdateTimestamp); + // return Arrays.asList(r1, r2); return Arrays.asList(r1); } catch (final Exception e) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java index 1d4eca2c2..4110bd806 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.io.IOException; import java.io.StringReader; import java.text.SimpleDateFormat; import java.util.Arrays; @@ -82,11 +83,11 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication public static void processPaths(final SparkSession spark, final String outputPath, final Set paths, - final String type) throws Exception { + final String type) { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - log.info("Found " + paths.size() + " not empty mdstores"); + log.info("Found {} not empty mdstores", paths.size()); paths.forEach(log::info); final String[] validPaths = paths @@ -98,7 +99,7 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication spark .read() .parquet(validPaths) - .map((MapFunction) r -> enrichRecord(r), Encoders.STRING()) + .map((MapFunction) MigrateHdfsMdstoresApplication::enrichRecord, Encoders.STRING()) .toJavaRDD() .mapToPair(xml -> new Tuple2<>(new Text(UUID.randomUUID() + ":" + type), new Text(xml))) // .coalesce(1) @@ -120,7 +121,9 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication final String tranDate = dateFormat.format(new Date((Long) r.getAs("dateOfTransformation"))); try { - final Document doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(xml)); final Element head = (Element) doc.selectSingleNode("//*[local-name() = 'header']"); head.addElement(new QName("objIdentifier", DRI_NS_PREFIX)).addText(r.getAs("id")); head.addElement(new QName("dateOfCollection", DRI_NS_PREFIX)).addText(collDate); @@ -135,8 +138,7 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication private static Set mdstorePaths(final String mdstoreManagerUrl, final String format, final String layout, - final String interpretation) - throws Exception { + final String interpretation) throws IOException { final String url = mdstoreManagerUrl + "/mdstores/"; final ObjectMapper objectMapper = new ObjectMapper(); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java index 3f6afbeac..6dbab96cb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java @@ -51,10 +51,10 @@ public class MigrateMongoMdstoresApplication extends AbstractMigrationApplicatio public void execute(final String format, final String layout, final String interpretation) { final Map colls = mdstoreClient.validCollections(format, layout, interpretation); - log.info("Found " + colls.size() + " mdstores"); + log.info("Found {} mdstores", colls.size()); for (final Entry entry : colls.entrySet()) { - log.info("Processing mdstore " + entry.getKey() + " (collection: " + entry.getValue() + ")"); + log.info("Processing mdstore {} (collection: {})", entry.getKey(), entry.getValue()); final String currentColl = entry.getValue(); for (final String xml : mdstoreClient.listRecords(currentColl)) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index d753cddeb..b7afd3595 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -19,7 +21,6 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; @@ -56,8 +57,8 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { .valueOf("./@nameIdentifierScheme") .trim() .toUpperCase() - .replaceAll(" ", "") - .replaceAll("_", ""); + .replace(" ", "") + .replace("_", ""); author.setPid(new ArrayList<>()); @@ -165,6 +166,13 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { .filter(n -> StringUtils.isNotBlank(n.getText())) .map(n -> n.getText().trim()) .filter(u -> u.startsWith("http")) + .map(s -> { + try { + return URLDecoder.decode(s, "UTF-8"); + } catch (Throwable t) { + return s; + } + }) .distinct() .collect(Collectors.toCollection(ArrayList::new))); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 7925a7826..194715295 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; import java.util.*; import java.util.stream.Collectors; @@ -13,7 +15,6 @@ import org.dom4j.Node; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; @@ -88,11 +89,11 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { .valueOf("./@nameIdentifierScheme") .trim() .toUpperCase() - .replaceAll(" ", "") - .replaceAll("_", ""); + .replace(" ", "") + .replace("_", ""); if (type.toLowerCase().startsWith(ORCID)) { - final String cleanedId = id.replaceAll("http://orcid.org/", "").replaceAll("https://orcid.org/", ""); + final String cleanedId = id.replace("http://orcid.org/", "").replace("https://orcid.org/", ""); res.add(structuredProperty(cleanedId, ORCID_PID_TYPE, info)); } else if (type.startsWith("MAGID")) { res.add(structuredProperty(id, MAG_PID_TYPE, info)); @@ -138,17 +139,17 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final Set url = new HashSet<>(); for (final Object o : doc .selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='URL']")) { - url.add(((Node) o).getText().trim()); + url.add(trimAndDecodeUrl(((Node) o).getText().trim())); } for (final Object o : doc .selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='landingPage']")) { - url.add(((Node) o).getText().trim()); + url.add(trimAndDecodeUrl(((Node) o).getText().trim())); } for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='URL']")) { - url.add(((Node) o).getText().trim()); + url.add(trimAndDecodeUrl(((Node) o).getText().trim())); } for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='landingPage']")) { - url.add(((Node) o).getText().trim()); + url.add(trimAndDecodeUrl(((Node) o).getText().trim())); } for (final Object o : doc .selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='DOI']")) { @@ -164,6 +165,14 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { return Arrays.asList(instance); } + protected String trimAndDecodeUrl(String url) { + try { + return URLDecoder.decode(url.trim(), "UTF-8"); + } catch (Throwable t) { + return url; + } + } + @Override protected List> prepareSources(final Document doc, final DataInfo info) { return new ArrayList<>(); // Not present in ODF ??? @@ -388,7 +397,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List prepareResultPids(final Document doc, final DataInfo info) { - final Set res = new HashSet(); + final Set res = new HashSet<>(); res .addAll( prepareListStructPropsWithValidQualifier( diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java index 5523863ff..edfd65299 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java @@ -4,12 +4,10 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.FileNotFoundException; -import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java index a0ce4f5a6..5d32fe926 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java @@ -12,6 +12,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Oaf; @@ -34,7 +35,7 @@ public class AbstractMigrationApplication implements Closeable { this.writer = null; } - public AbstractMigrationApplication(final String hdfsPath) throws Exception { + public AbstractMigrationApplication(final String hdfsPath) throws IOException { log.info(String.format("Creating SequenceFile Writer, hdfsPath=%s", hdfsPath)); @@ -46,15 +47,14 @@ public class AbstractMigrationApplication implements Closeable { SequenceFile.Writer.valueClass(Text.class)); } - private Configuration getConf() throws IOException { - final Configuration conf = new Configuration(); + private Configuration getConf() { + return new Configuration(); /* * conf.set("fs.defaultFS", hdfsNameNode); conf.set("fs.hdfs.impl", * org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", * org.apache.hadoop.fs.LocalFileSystem.class.getName()); System.setProperty("HADOOP_USER_NAME", hdfsUser); * System.setProperty("hadoop.home.dir", "/"); FileSystem.get(URI.create(hdfsNameNode), conf); */ - return conf; } protected void emit(final String s, final String type) { @@ -62,16 +62,16 @@ public class AbstractMigrationApplication implements Closeable { key.set(counter.getAndIncrement() + ":" + type); value.set(s); writer.append(key, value); - } catch (final Exception e) { - throw new RuntimeException(e); + } catch (final IOException e) { + throw new IllegalStateException(e); } } protected void emitOaf(final Oaf oaf) { try { emit(objectMapper.writeValueAsString(oaf), oaf.getClass().getSimpleName().toLowerCase()); - } catch (final Exception e) { - throw new RuntimeException(e); + } catch (JsonProcessingException e) { + throw new IllegalStateException(e); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala index b2fddec20..1b13b81c7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala @@ -80,7 +80,7 @@ object SparkResolveRelation { } - private def extractPidsFromRecord(input:String):(String,List[(String,String)]) = { + def extractPidsFromRecord(input:String):(String,List[(String,String)]) = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) val id:String = (json \ "id").extract[String] @@ -90,7 +90,15 @@ object SparkResolveRelation { JField("qualifier", JObject(qualifier)) <- pids JField("classname", JString(pidType)) <- qualifier } yield (pidValue, pidType) - (id,result) + + val alternateIds: List[(String,String)] = for { + JObject(pids) <- json \\ "alternateIdentifier" + JField("value", JString(pidValue)) <- pids + JField("qualifier", JObject(qualifier)) <- pids + JField("classname", JString(pidType)) <- qualifier + } yield (pidValue, pidType) + + (id,result:::alternateIds) } private def extractPidResolvedTableFromJsonRDD(spark: SparkSession, entityPath: String, workingPath: String) = { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala index 9a49deebc..202eb7b14 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala @@ -1,10 +1,10 @@ package eu.dnetlib.dhp.sx.graph.bio.pubmed - import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} import eu.dnetlib.dhp.schema.oaf._ +import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} +import java.util.regex.Pattern import scala.collection.JavaConverters._ object PubMedToOaf { @@ -15,6 +15,20 @@ object PubMedToOaf { "doi" -> "https://dx.doi.org/" ) + def cleanDoi(doi:String):String = { + + val regex = "^10.\\d{4,9}\\/[\\[\\]\\-\\<\\>._;()\\/:A-Z0-9]+$" + + + val pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE) + val matcher = pattern.matcher(doi) + + if (matcher.find) { + return matcher.group(0) + } + null + } + def createResult(cobjQualifier: Qualifier, vocabularies: VocabularyGroup): Result = { val result_typologies = getVocabularyTerm(ModelConstants.DNET_RESULT_TYPOLOGIES, vocabularies, cobjQualifier.getClassid) result_typologies.getClassid match { @@ -60,8 +74,12 @@ object PubMedToOaf { var pidList: List[StructuredProperty] = List(OafMapperUtils.structuredProperty(article.getPmid, PidType.pmid.toString, PidType.pmid.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo)) if (pidList == null) return null + + var alternateIdentifier :StructuredProperty = null if (article.getDoi != null) { - pidList = pidList ::: List(OafMapperUtils.structuredProperty(article.getDoi, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo)) + val normalizedPid = cleanDoi(article.getDoi) + if (normalizedPid!= null) + alternateIdentifier = OafMapperUtils.structuredProperty(normalizedPid, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo) } // If the article contains the typology Journal Article then we apply this type @@ -84,9 +102,9 @@ object PubMedToOaf { return result result.setDataInfo(dataInfo) i.setPid(pidList.asJava) + if (alternateIdentifier!= null) + i.setAlternateIdentifier(List(alternateIdentifier).asJava) result.setInstance(List(i).asJava) - - i.getPid.asScala.filter(p => "pmid".equalsIgnoreCase(p.getQualifier.getClassid)).map(p => p.getValue)(collection breakOut) val urlLists: List[String] = pidList .map(s => (urlMap.getOrElse(s.getQualifier.getClassid, ""), s.getValue)) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkCreateBaselineDataFrame.scala index 26efd723f..ee76cf8ad 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkCreateBaselineDataFrame.scala @@ -6,18 +6,130 @@ import eu.dnetlib.dhp.schema.oaf.Result import eu.dnetlib.dhp.sx.graph.bio.pubmed.{PMArticle, PMAuthor, PMJournal, PMParser, PubMedToOaf} import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.commons.io.IOUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FSDataOutputStream, FileSystem, Path} +import org.apache.http.client.config.RequestConfig +import org.apache.http.client.methods.HttpGet +import org.apache.http.impl.client.HttpClientBuilder import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD import org.apache.spark.sql.expressions.Aggregator import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} +import java.io.InputStream import scala.io.Source import scala.xml.pull.XMLEventReader object SparkCreateBaselineDataFrame { + def requestBaseLineUpdatePage(maxFile:String):List[(String,String)] = { + val data =requestPage("https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/") + + val result =data.lines.filter(l => l.startsWith("") + val start = l.indexOf("= 0 && end >start) + l.substring(start+9, (end-start)) + else + "" + }.filter(s =>s.endsWith(".gz") ).filter(s => s > maxFile).map(s => (s,s"https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/$s")).toList + + result + } + + + def downloadBaselinePart(url:String):InputStream = { + val r = new HttpGet(url) + val timeout = 60; // seconds + val config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build() + val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() + val response = client.execute(r) + println(s"get response with status${response.getStatusLine.getStatusCode}") + response.getEntity.getContent + + } + + def requestPage(url:String):String = { + val r = new HttpGet(url) + val timeout = 60; // seconds + val config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build() + val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() + try { + var tries = 4 + while (tries > 0) { + println(s"requesting ${r.getURI}") + try { + val response = client.execute(r) + println(s"get response with status${response.getStatusLine.getStatusCode}") + if (response.getStatusLine.getStatusCode > 400) { + tries -= 1 + } + else + return IOUtils.toString(response.getEntity.getContent) + } catch { + case e: Throwable => + println(s"Error on requesting ${r.getURI}") + e.printStackTrace() + tries -= 1 + } + } + "" + } finally { + if (client != null) + client.close() + } + } + + + + + + + def downloadBaseLineUpdate(baselinePath:String, hdfsServerUri:String ):Unit = { + + + val conf = new Configuration + conf.set("fs.defaultFS", hdfsServerUri) + val fs = FileSystem.get(conf) + val p = new Path(baselinePath) + val files = fs.listFiles(p,false) + var max_file = "" + while (files.hasNext) { + val c = files.next() + val data = c.getPath.toString + val fileName = data.substring(data.lastIndexOf("/")+1) + + if (fileName> max_file) + max_file = fileName + } + + val files_to_download = requestBaseLineUpdatePage(max_file) + + files_to_download.foreach { u => + val hdfsWritePath: Path = new Path(s"$baselinePath/${u._1}") + val fsDataOutputStream: FSDataOutputStream = fs.create(hdfsWritePath, true) + val i = downloadBaselinePart(u._2) + val buffer = Array.fill[Byte](1024)(0) + while(i.read(buffer)>0) { + fsDataOutputStream.write(buffer) + } + i.close() + println(s"Downloaded ${u._2} into $baselinePath/${u._1}") + fsDataOutputStream.close() + } + + } + + val pmArticleAggregator: Aggregator[(String, PMArticle), PMArticle, PMArticle] = new Aggregator[(String, PMArticle), PMArticle, PMArticle] with Serializable { override def zero: PMArticle = new PMArticle @@ -51,6 +163,10 @@ object SparkCreateBaselineDataFrame { val targetPath = parser.get("targetPath") log.info("targetPath: {}", targetPath) + val hdfsServerUri = parser.get("hdfsServerUri") + log.info("hdfsServerUri: {}", targetPath) + + val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl) val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService) val spark: SparkSession = @@ -61,16 +177,15 @@ object SparkCreateBaselineDataFrame { .master(parser.get("master")).getOrCreate() import spark.implicits._ - val sc = spark.sparkContext - - implicit val PMEncoder: Encoder[PMArticle] = Encoders.kryo(classOf[PMArticle]) implicit val PMJEncoder: Encoder[PMJournal] = Encoders.kryo(classOf[PMJournal]) implicit val PMAEncoder: Encoder[PMAuthor] = Encoders.kryo(classOf[PMAuthor]) implicit val resultEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + downloadBaseLineUpdate(s"$workingPath/baseline", hdfsServerUri) + val k: RDD[(String, String)] = sc.wholeTextFiles(s"$workingPath/baseline",2000) val ds:Dataset[PMArticle] = spark.createDataset(k.filter(i => i._1.endsWith(".gz")).flatMap(i =>{ val xml = new XMLEventReader(Source.fromBytes(i._2.getBytes())) @@ -87,7 +202,5 @@ object SparkCreateBaselineDataFrame { .map(a => PubMedToOaf.convert(a, vocabularies)).as[Result] .filter(p => p!= null) .write.mode(SaveMode.Overwrite).save(targetPath) - - //s"$workingPath/oaf/baseline_oaf" } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala new file mode 100644 index 000000000..e940fdff0 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala @@ -0,0 +1,116 @@ +package eu.dnetlib.dhp.sx.graph.ebi + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF.EBILinkItem +import eu.dnetlib.dhp.sx.graph.bio.pubmed.{PMArticle, PMAuthor, PMJournal} +import org.apache.commons.io.IOUtils +import org.apache.http.client.config.RequestConfig +import org.apache.http.client.methods.HttpGet +import org.apache.http.impl.client.HttpClientBuilder +import org.apache.spark.SparkConf +import org.apache.spark.sql.functions.max +import org.apache.spark.sql._ +import org.slf4j.{Logger, LoggerFactory} + +object SparkDownloadEBILinks { + + def createEBILinks(pmid:Long):EBILinkItem = { + + val res = requestLinks(pmid) + if (res!=null) + return EBILinkItem(pmid, res) + null + } + + def requestPage(url:String):String = { + val r = new HttpGet(url) + val timeout = 60; // seconds + val config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build() + val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() + try { + var tries = 4 + while (tries > 0) { + println(s"requesting ${r.getURI}") + try { + val response = client.execute(r) + println(s"get response with status${response.getStatusLine.getStatusCode}") + if (response.getStatusLine.getStatusCode > 400) { + tries -= 1 + } + else + return IOUtils.toString(response.getEntity.getContent) + } catch { + case e: Throwable => + println(s"Error on requesting ${r.getURI}") + e.printStackTrace() + tries -= 1 + } + } + "" + } finally { + if (client != null) + client.close() + } + } + + def requestLinks(PMID:Long):String = { + requestPage(s"https://www.ebi.ac.uk/europepmc/webservices/rest/MED/$PMID/datalinks?format=json") + + } + def main(args: Array[String]): Unit = { + + val log: Logger = LoggerFactory.getLogger(getClass) + val MAX_ITEM_PER_PARTITION = 20000 + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(SparkEBILinksToOaf.getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + import spark.implicits._ + + implicit val PMEncoder: Encoder[PMArticle] = Encoders.kryo(classOf[PMArticle]) + implicit val PMJEncoder: Encoder[PMJournal] = Encoders.kryo(classOf[PMJournal]) + implicit val PMAEncoder: Encoder[PMAuthor] = Encoders.kryo(classOf[PMAuthor]) + + val sourcePath = parser.get("sourcePath") + log.info(s"sourcePath -> $sourcePath") + val workingPath = parser.get("workingPath") + log.info(s"workingPath -> $workingPath") + + log.info("Getting max pubmedId where the links have been requested") + val links:Dataset[EBILinkItem] = spark.read.load(s"$sourcePath/ebi_links_dataset").as[EBILinkItem] + val lastPMIDRequested =links.map(l => l.id).select(max("value")).first.getLong(0) + + log.info("Retrieving PMID to request links") + val pubmed = spark.read.load(s"$sourcePath/baseline_dataset").as[PMArticle] + pubmed.map(p => p.getPmid.toLong).where(s"value > $lastPMIDRequested").write.mode(SaveMode.Overwrite).save(s"$workingPath/id_to_request") + + val pmidToReq:Dataset[Long] = spark.read.load(s"$workingPath/id_to_request").as[Long] + + val total = pmidToReq.count() + + spark.createDataset(pmidToReq.rdd.repartition((total/MAX_ITEM_PER_PARTITION).toInt).map(pmid =>createEBILinks(pmid)).filter(l => l!= null)).write.mode(SaveMode.Overwrite).save(s"$workingPath/links_update") + + val updates:Dataset[EBILinkItem] =spark.read.load(s"$workingPath/links_update").as[EBILinkItem] + + links.union(updates).groupByKey(_.id) + .reduceGroups{(x,y) => + if (x == null || x.links ==null) + y + if (y ==null || y.links ==null) + x + if (x.links.length > y.links.length) + x + else + y + }.map(_._2).write.mode(SaveMode.Overwrite).save(s"$workingPath/links_final") + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkEBILinksToOaf.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkEBILinksToOaf.scala index f14e5f264..1924d919e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkEBILinksToOaf.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkEBILinksToOaf.scala @@ -32,14 +32,9 @@ object SparkEBILinksToOaf { import spark.implicits._ implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) - val ebi_rdd:Dataset[EBILinkItem] = spark.createDataset(spark.sparkContext.textFile(sourcePath).map(s => BioDBToOAF.extractEBILinksFromDump(s))).as[EBILinkItem] - - ebi_rdd.write.mode(SaveMode.Overwrite).save(s"${sourcePath}_dataset") - val ebLinks:Dataset[EBILinkItem] = spark.read.load(s"${sourcePath}_dataset").as[EBILinkItem].filter(l => l.links!= null) ebLinks.flatMap(j =>BioDBToOAF.parse_ebi_links(j.links)) - .repartition(4000) .filter(p => BioDBToOAF.EBITargetLinksFilter(p)) .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)) .write.mode(SaveMode.Overwrite).save(targetPath) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql index ea483a4a7..46e0eb5e1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql @@ -1,10 +1,10 @@ DROP VIEW IF EXISTS ${hiveDbName}.result; CREATE VIEW IF NOT EXISTS ${hiveDbName}.result as - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance from ${hiveDbName}.publication p + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.publication p union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance from ${hiveDbName}.dataset d + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.dataset d union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance from ${hiveDbName}.software s + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.software s union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance from ${hiveDbName}.otherresearchproduct o; + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.otherresearchproduct o; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json new file mode 100644 index 000000000..50fbb00f0 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json @@ -0,0 +1,40 @@ +[ + { + "paramName":"fu", + "paramLongName":"fileURL", + "paramDescription": "the url to download the csv file ", + "paramRequired": true + }, + { + "paramName":"tf", + "paramLongName":"tmpFile", + "paramDescription": "the temporary local file storing the cleaned CSV contents for unibi gold list and doj artciles", + "paramRequired": true + }, + { + "paramName":"of", + "paramLongName":"outputFile", + "paramDescription": "the output json file produced by the CSV downlaod procedure", + "paramRequired": true + }, + { + "paramName": "hnn", + "paramLongName": "hdfsNameNode", + "paramDescription": "the path used to store the HostedByMap", + "paramRequired": true + }, + { + "paramName": "cfn", + "paramLongName": "classForName", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + }, + { + "paramName": "d", + "paramLongName": "delimiter", + "paramDescription": "csv delimiter character", + "paramRequired": false + } +] + + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json new file mode 100644 index 000000000..e34398290 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json @@ -0,0 +1,36 @@ + +[ + + { + "paramName":"pip", + "paramLongName":"preparedInfoPath", + "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramRequired": true + }, + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "m", + "paramLongName": "master", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + }, + + { + "paramName": "gp", + "paramLongName": "graphPath", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + } +] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json new file mode 100644 index 000000000..9173b78ae --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json @@ -0,0 +1,38 @@ + +[ + + { + "paramName":"dsp", + "paramLongName":"datasourcePath", + "paramDescription": "the path to the datasource ", + "paramRequired": true + }, + + { + "paramName":"wp", + "paramLongName":"workingPath", + "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store the HostedByMap", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "m", + "paramLongName": "master", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + } +] + + + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json new file mode 100644 index 000000000..9809d71b5 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json @@ -0,0 +1,37 @@ + +[ + + { + "paramName":"hbmp", + "paramLongName":"hostedByMapPath", + "paramDescription": "the path to the datasource ", + "paramRequired": true + }, + + { + "paramName":"pip", + "paramLongName":"preparedInfoPath", + "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramRequired": true + }, + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "m", + "paramLongName": "master", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + }, + + { + "paramName": "gp", + "paramLongName": "graphPath", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + } +] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml new file mode 100644 index 000000000..e5ec3d0ae --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml new file mode 100644 index 000000000..84035fe4e --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -0,0 +1,238 @@ + + + + + sourcePath + the source path + + + outputPath + the output path + + + hostedByMapPath + the output path + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + + ${wf:conf('resumeFrom') eq 'ProduceHBM'} + ${wf:conf('resumeFrom') eq 'download_csv'} + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + + + + + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV2 + --hdfsNameNode${nameNode} + --fileURL${unibiFileURL} + --tmpFile/tmp/unibi_gold_replaced.csv + --outputFile${workingDir}/unibi_gold.json + --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel + + + + + + + + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV2 + --hdfsNameNode${nameNode} + --fileURL${doajFileURL} + --tmpFile/tmp/doaj_replaced.csv + --outputFile${workingDir}/doaj.json + --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel + + + + + + + + + + yarn-cluster + Produce the new HostedByMap + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkProduceHostedByMap + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --datasourcePath${sourcePath}/datasource + --workingPath/user/${wf:user()}/data + --outputPath${hostedByMapPath} + --masteryarn-cluster + + + + + + + + yarn-cluster + Prepare info to apply the hbm + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --hostedByMapPath${hostedByMapPath} + --preparedInfoPath${workingDir}/preparedInfo + --graphPath${sourcePath} + --masteryarn-cluster + + + + + + + + + + + + + yarn-cluster + Apply hbm to result + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToResult + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --outputPath${outputPath}/publication + --preparedInfoPath${workingDir}/preparedInfo + --graphPath${sourcePath} + --masteryarn-cluster + + + + + + + + yarn-cluster + Apply hbm to datasource + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToDatasource + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --outputPath${outputPath}/datasource + --preparedInfoPath${workingDir}/preparedInfo + --graphPath${sourcePath} + --masteryarn-cluster + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql index f0a4161ab..98092e882 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql @@ -84,13 +84,18 @@ SELECT dc.id AS collectedfromid, dc.officialname AS collectedfromname, d.typology||'@@@dnet:datasource_typologies' AS datasourcetype, + d.typology||'@@@dnet:datasource_typologies_ui' AS datasourcetypeui, 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, d.issn AS issnPrinted, d.eissn AS issnOnline, - d.lissn AS issnLinking + d.lissn AS issnLinking, + de.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction, + de.thematic AS thematic, + de.knowledge_graph AS knowledgegraph, + array(select unnest(de.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies FROM dsm_datasources d - +LEFT OUTER JOIN dsm_datasources_eosc de on (d.id = de.id) LEFT OUTER JOIN dsm_datasources dc on (d.collectedfrom = dc.id) LEFT OUTER JOIN dsm_api a ON (d.id = a.datasource) LEFT OUTER JOIN dsm_datasourcepids di ON (d.id = di.datasource) @@ -126,4 +131,8 @@ GROUP BY dc.officialname, d.issn, d.eissn, - d.lissn + d.lissn, + de.jurisdiction, + de.thematic, + de.knowledge_graph, + de.content_policies diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/baseline_to_oaf_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/baseline_to_oaf_params.json index 38eb50094..4bee770bd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/baseline_to_oaf_params.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/baseline_to_oaf_params.json @@ -1,6 +1,7 @@ [ - {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, - {"paramName":"i", "paramLongName":"isLookupUrl","paramDescription": "isLookupUrl", "paramRequired": true}, - {"paramName":"w", "paramLongName":"workingPath","paramDescription": "the path of the sequencial file to read", "paramRequired": true}, - {"paramName":"t", "paramLongName":"targetPath","paramDescription": "the oaf path ", "paramRequired": true} + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"i", "paramLongName":"isLookupUrl", "paramDescription": "isLookupUrl", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the path of the sequencial file to read", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the oaf path ", "paramRequired": true}, + {"paramName":"h", "paramLongName":"hdfsServerUri", "paramDescription": "the working path ", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json new file mode 100644 index 000000000..0860ed558 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json @@ -0,0 +1,5 @@ +[ + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source Path", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the working path ", "paramRequired": true} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml index 3f442c5c6..7612321c0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml @@ -25,7 +25,6 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - yarn-cluster @@ -43,6 +42,7 @@ --workingPath${workingPath} --masteryarn + --hdfsServerUri${nameNode} @@ -74,7 +74,7 @@ yarn-cluster cluster - Create Baselnie DataSet + Create Baseline DataSet eu.dnetlib.dhp.sx.ebi.SparkAddLinkUpdates dhp-graph-mapper-${projectVersion}.jar diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/config-default.xml new file mode 100644 index 000000000..17cd6c9a3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/config-default.xml @@ -0,0 +1,68 @@ + + + + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + + + + + + + + + + + + + + + + + + + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + "com.cloudera.spark.lineage.NavigatorAppListener" + + + spark2SqlQueryExecutionListeners + "com.cloudera.spark.lineage.NavigatorQueryListener" + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml new file mode 100644 index 000000000..cd3bb8c71 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml @@ -0,0 +1,67 @@ + + + + sourcePath + the Working Path + + + workingPath + the Working Path + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + yarn-cluster + cluster + Incremental Download EBI Links + eu.dnetlib.dhp.sx.graph.ebi.SparkDownloadEBILinks + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.shuffle.partitions=2000 + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath} + --workingPath${workingPath} + --masteryarn + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java index 32f6e7abc..afaac04ea 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java @@ -68,7 +68,7 @@ public class GraphHiveImporterJobTest { } @Test - public void testImportGraphAsHiveDB() throws Exception { + void testImportGraphAsHiveDB() throws Exception { GraphHiveImporterJob .main( diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index b196d1948..42d9f226c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -5,6 +5,7 @@ import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.lenient; import java.io.IOException; +import java.util.Collection; import java.util.List; import java.util.Set; import java.util.stream.Stream; @@ -16,12 +17,12 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -29,7 +30,8 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class GraphCleaningFunctionsTest { - public static final ObjectMapper MAPPER = new ObjectMapper(); + public static final ObjectMapper MAPPER = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @Mock private ISLookUpService isLookUpService; @@ -50,7 +52,24 @@ public class GraphCleaningFunctionsTest { } @Test - public void testCleaning() throws Exception { + void testCleanRelations() throws Exception { + + List lines = IOUtils + .readLines(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/relation.json")); + for (String json : lines) { + Relation r_in = MAPPER.readValue(json, Relation.class); + assertNotNull(r_in); + + assertFalse(vocabularies.getTerms(ModelConstants.DNET_RELATION_RELCLASS).contains(r_in.getRelClass())); + + Relation r_out = OafCleaner.apply(r_in, mapping); + assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_RELCLASS).contains(r_out.getRelClass())); + assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_SUBRELTYPE).contains(r_out.getSubRelType())); + } + } + + @Test + void testCleaning() throws Exception { assertNotNull(vocabularies); assertNotNull(mapping); @@ -87,7 +106,7 @@ public class GraphCleaningFunctionsTest { p_out .getPid() .stream() - .map(p -> p.getQualifier()) + .map(StructuredProperty::getQualifier) .allMatch(q -> pidTerms.contains(q.getClassid()))); List poi = p_out.getInstance(); @@ -101,8 +120,8 @@ public class GraphCleaningFunctionsTest { assertEquals(2, poii.getPid().size()); assertTrue( - poii.getPid().stream().filter(s -> s.getValue().equals("10.1007/s109090161569x")).findFirst().isPresent()); - assertTrue(poii.getPid().stream().filter(s -> s.getValue().equals("10.1008/abcd")).findFirst().isPresent()); + poii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1007/s109090161569x"))); + assertTrue(poii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1008/abcd"))); assertNotNull(poii.getAlternateIdentifier()); assertEquals(2, poii.getAlternateIdentifier().size()); @@ -111,16 +130,12 @@ public class GraphCleaningFunctionsTest { poii .getAlternateIdentifier() .stream() - .filter(s -> s.getValue().equals("10.1007/s109090161569x")) - .findFirst() - .isPresent()); + .anyMatch(s -> s.getValue().equals("10.1007/s109090161569x"))); assertTrue( poii .getAlternateIdentifier() .stream() - .filter(s -> s.getValue().equals("10.1009/qwerty")) - .findFirst() - .isPresent()); + .anyMatch(s -> s.getValue().equals("10.1009/qwerty"))); Publication p_cleaned = GraphCleaningFunctions.cleanup(p_out); @@ -142,8 +157,8 @@ public class GraphCleaningFunctionsTest { assertEquals(2, pcii.getPid().size()); assertTrue( - pcii.getPid().stream().filter(s -> s.getValue().equals("10.1007/s109090161569x")).findFirst().isPresent()); - assertTrue(pcii.getPid().stream().filter(s -> s.getValue().equals("10.1008/abcd")).findFirst().isPresent()); + pcii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1007/s109090161569x"))); + assertTrue(pcii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1008/abcd"))); assertNotNull(pcii.getAlternateIdentifier()); assertEquals(1, pcii.getAlternateIdentifier().size()); @@ -151,9 +166,7 @@ public class GraphCleaningFunctionsTest { pcii .getAlternateIdentifier() .stream() - .filter(s -> s.getValue().equals("10.1009/qwerty")) - .findFirst() - .isPresent()); + .anyMatch(s -> s.getValue().equals("10.1009/qwerty"))); getAuthorPids(p_cleaned).forEach(pid -> { System.out @@ -166,27 +179,23 @@ public class GraphCleaningFunctionsTest { // TODO add more assertions to verity the cleaned values System.out.println(MAPPER.writeValueAsString(p_cleaned)); - - /* - * assertTrue( p_out .getPid() .stream() .allMatch(sp -> StringUtils.isNotBlank(sp.getValue()))); - */ } private Stream getAuthorPidTypes(Result pub) { return pub .getAuthor() .stream() - .map(a -> a.getPid()) - .flatMap(p -> p.stream()) - .map(s -> s.getQualifier()); + .map(Author::getPid) + .flatMap(Collection::stream) + .map(StructuredProperty::getQualifier); } private Stream getAuthorPids(Result pub) { return pub .getAuthor() .stream() - .map(a -> a.getPid()) - .flatMap(p -> p.stream()); + .map(Author::getPid) + .flatMap(Collection::stream); } private List vocs() throws IOException { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java index 803ae0416..697ec705f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java @@ -10,10 +10,10 @@ import com.github.victools.jsonschema.generator.*; import eu.dnetlib.dhp.schema.dump.oaf.graph.*; @Disabled -public class GenerateJsonSchema { +class GenerateJsonSchema { @Test - public void generateSchema() { + void generateSchema() { SchemaGeneratorConfigBuilder configBuilder = new SchemaGeneratorConfigBuilder(SchemaVersion.DRAFT_7, OptionPreset.PLAIN_JSON) .with(Option.SCHEMA_VERSION_INDICATOR) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java index 51e4e1033..41b906e58 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java @@ -22,7 +22,7 @@ public class MakeTarTest { } @Test - public void testTar() throws IOException { + void testTar() throws IOException { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); fs diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java index d5a9ba8dd..03eed7a45 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java @@ -1,10 +1,11 @@ package eu.dnetlib.dhp.oa.graph.dump; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.HashMap; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; @@ -15,7 +16,6 @@ import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.slf4j.Logger; @@ -37,8 +37,6 @@ public class PrepareResultProjectJobTest { private static final Logger log = LoggerFactory .getLogger(eu.dnetlib.dhp.oa.graph.dump.PrepareResultProjectJobTest.class); - private static final HashMap map = new HashMap<>(); - @BeforeAll public static void beforeAll() throws IOException { workingDir = Files @@ -69,7 +67,7 @@ public class PrepareResultProjectJobTest { } @Test - public void testNoMatch() throws Exception { + void testNoMatch() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/no_match") @@ -90,12 +88,12 @@ public class PrepareResultProjectJobTest { org.apache.spark.sql.Dataset verificationDataset = spark .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); - Assertions.assertEquals(0, verificationDataset.count()); + assertEquals(0, verificationDataset.count()); } @Test - public void testMatchOne() throws Exception { + void testMatchOne() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_one") @@ -116,12 +114,11 @@ public class PrepareResultProjectJobTest { org.apache.spark.sql.Dataset verificationDataset = spark .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); - Assertions.assertTrue(verificationDataset.count() == 1); + assertEquals(1, verificationDataset.count()); - Assertions - .assertEquals( - 1, - verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); + assertEquals( + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); verificationDataset.createOrReplaceTempView("table"); @@ -131,14 +128,14 @@ public class PrepareResultProjectJobTest { "from table " + "lateral view explode (projectsList) pl as projList"); - Assertions.assertEquals(1, check.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); + assertEquals(1, check.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); verificationDataset.show(false); } @Test - public void testMatch() throws Exception { + void testMatch() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/match") @@ -159,16 +156,14 @@ public class PrepareResultProjectJobTest { org.apache.spark.sql.Dataset verificationDataset = spark .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); - Assertions.assertTrue(verificationDataset.count() == 2); + assertEquals(2, verificationDataset.count()); - Assertions - .assertEquals( - 1, - verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); - Assertions - .assertEquals( - 1, - verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); + assertEquals( + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); + assertEquals( + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); verificationDataset.createOrReplaceTempView("dataset"); @@ -177,62 +172,54 @@ public class PrepareResultProjectJobTest { + "lateral view explode(projectsList) p as MyT "; org.apache.spark.sql.Dataset resultExplodedProvenance = spark.sql(query); - Assertions.assertEquals(3, resultExplodedProvenance.count()); - Assertions - .assertEquals( - 2, - resultExplodedProvenance - .filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") - .count()); + assertEquals(3, resultExplodedProvenance.count()); + assertEquals( + 2, + resultExplodedProvenance + .filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") + .count()); - Assertions - .assertEquals( - 2, - resultExplodedProvenance - .filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'") - .count()); + assertEquals( + 2, + resultExplodedProvenance + .filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::03376222b28a3aebf2730ac514818d04' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::03376222b28a3aebf2730ac514818d04' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") + .count()); - Assertions - .assertEquals( - 3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); + assertEquals( + 3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java index c6666342a..98902b618 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java @@ -14,12 +14,13 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.xml.sax.SAXException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class QueryInformationSystemTest { +class QueryInformationSystemTest { private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " + @@ -66,7 +67,7 @@ public class QueryInformationSystemTest { private Map map; @BeforeEach - public void setUp() throws ISLookUpException, DocumentException { + public void setUp() throws ISLookUpException, DocumentException, SAXException { lenient().when(isLookUpService.quickSearchProfile(XQUERY)).thenReturn(communityMap); queryInformationSystem = new QueryInformationSystem(); queryInformationSystem.setIsLookUp(isLookUpService); @@ -74,13 +75,13 @@ public class QueryInformationSystemTest { } @Test - public void testSize() throws ISLookUpException { + void testSize() throws ISLookUpException { Assertions.assertEquals(23, map.size()); } @Test - public void testContent() { + void testContent() { Assertions.assertTrue(map.containsKey("egi") && map.get("egi").equals("EGI Federation")); Assertions.assertTrue(map.containsKey("fet-fp7") && map.get("fet-fp7").equals("FET FP7")); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java index 42ad5634a..e6f0e9106 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java @@ -62,7 +62,7 @@ public class SplitForCommunityTest { } @Test - public void test1() { + void testCommunitySplit() { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/splitForCommunity") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java index 20a46cee0..a164593ec 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java @@ -68,7 +68,7 @@ public class UpdateProjectInfoTest { } @Test - public void test1() throws Exception { + void test1() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java index 05dc423cb..8d06758a8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java @@ -32,7 +32,7 @@ public class ZenodoUploadTest { } @Test - public void testNewDeposition() throws IOException { + void testNewDeposition() throws IOException { CommunityMap communityMap = new CommunityMap(); communityMap.put("ni", "Neuroinformatics"); communityMap.put("dh-ch", "Digital Humanities and Cultural Heritage"); @@ -86,7 +86,7 @@ public class ZenodoUploadTest { } @Test - public void testNewVersion() throws IOException, MissingConceptDoiException { + void testNewVersion() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); @@ -137,7 +137,7 @@ public class ZenodoUploadTest { } @Test - public void readCommunityMap() throws IOException { + void readCommunityMap() throws IOException { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); System.out .println( diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java index 3ecbd1894..b5f73da71 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java @@ -86,7 +86,7 @@ public class CreateEntityTest { } @Test - public void test1() throws ISLookUpException, IOException { + void test1() throws ISLookUpException, IOException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); queryInformationSystem.getContextInformation(consumer); @@ -97,7 +97,7 @@ public class CreateEntityTest { Assertions.assertEquals(12, riList.size()); riList.stream().forEach(c -> { - switch (c.getOriginalId()) { + switch (c.getAcronym()) { case "mes": Assertions .assertTrue(c.getType().equals(eu.dnetlib.dhp.oa.graph.dump.Constants.RESEARCH_COMMUNITY)); @@ -115,9 +115,9 @@ public class CreateEntityTest { String .format( "%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5(c.getOriginalId())))); + DHPUtils.md5(c.getAcronym())))); Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_mes")); - Assertions.assertTrue("mes".equals(c.getOriginalId())); + Assertions.assertTrue("mes".equals(c.getAcronym())); break; case "clarin": Assertions @@ -130,9 +130,9 @@ public class CreateEntityTest { String .format( "%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5(c.getOriginalId())))); + DHPUtils.md5(c.getAcronym())))); Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_clarin")); - Assertions.assertTrue("clarin".equals(c.getOriginalId())); + Assertions.assertTrue("clarin".equals(c.getAcronym())); break; } // TODO add check for all the others Entities @@ -144,7 +144,7 @@ public class CreateEntityTest { @Test @Disabled - public void test2() throws IOException, ISLookUpException { + void test2() throws IOException, ISLookUpException { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); Path hdfsWritePath = new Path(workingDir + "/prova"); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java index b556fa2d6..69e550d45 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java @@ -16,7 +16,7 @@ import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.utils.DHPUtils; -public class CreateRelationTest { +class CreateRelationTest { List communityContext = Arrays .asList( @@ -473,7 +473,7 @@ public class CreateRelationTest { } @Test - public void test1() { + void test1() { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java index 3d42f124e..e43383ef4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java @@ -60,7 +60,7 @@ public class ExtractRelationFromEntityTest { } @Test - public void test1() { + void test1() { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/singelRecord_pub.json") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java index 75d5a2673..eb5919fd5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java @@ -4,13 +4,14 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; import org.dom4j.DocumentException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.schema.dump.oaf.graph.Funder; -public class FunderParsingTest { +class FunderParsingTest { @Test - public void testFunderTwoLevels() throws DocumentException { + void testFunderTwoLevels() throws DocumentException { String funding_Stream = "nsf_________::NSFNSFNational Science " + @@ -37,7 +38,7 @@ public class FunderParsingTest { } @Test - public void testFunderThreeeLevels() throws DocumentException { + void testFunderThreeeLevels() throws DocumentException, SAXException { String funding_stream = "ec__________::EC" + "EC" + "European Commission" + diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java index d769aa138..049959704 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java @@ -17,7 +17,7 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class QueryInformationSystemTest { +class QueryInformationSystemTest { private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " + @@ -513,7 +513,7 @@ public class QueryInformationSystemTest { } @Test - public void testSizeEntity() throws ISLookUpException { + void testSizeEntity() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); @@ -523,7 +523,7 @@ public class QueryInformationSystemTest { } @Test - public void testSizeRelation() throws ISLookUpException { + void testSizeRelation() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); @@ -534,7 +534,7 @@ public class QueryInformationSystemTest { } @Test - public void testContentRelation() throws ISLookUpException { + void testContentRelation() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); @@ -572,7 +572,7 @@ public class QueryInformationSystemTest { } @Test - public void testContentEntity() throws ISLookUpException { + void testContentEntity() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java index ea2dc73ca..50a9f26b4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java @@ -69,7 +69,7 @@ public class RelationFromOrganizationTest { } @Test - public void test1() throws Exception { + void test1() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java index 6c5ebbab3..d1e3b3acc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java @@ -69,7 +69,7 @@ public class ResultLinkedToProjectTest { } @Test - public void testNoMatch() throws Exception { + void testNoMatch() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json") @@ -102,7 +102,7 @@ public class ResultLinkedToProjectTest { } @Test - public void testMatchOne() throws Exception { + void testMatchOne() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/papers.json") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java index 71bf5d942..8ac0c552f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java @@ -65,7 +65,7 @@ public class SplitPerFunderTest { } @Test - public void test1() throws Exception { + void test1() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java new file mode 100644 index 000000000..edf74fc6a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java @@ -0,0 +1,149 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Files; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel; + +public class DownloadCsvTest { + + private static final Logger log = LoggerFactory.getLogger(DownloadCsvTest.class); + + private static String workingDir; + + private static LocalFileSystem fs; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(DownloadCsvTest.class.getSimpleName()) + .toString(); + + fs = FileSystem.getLocal(new Configuration()); + } + + @Disabled + @Test + void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException { + + String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; + + final String outputFile = workingDir + "/unibi_gold.json"; + new DownloadCSV() + .doDownload( + fileURL, + workingDir + "/unibi_gold", + outputFile, + UnibiGoldModel.class.getName(), + ',', + fs); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class); + if (count == 0) { + assertTrue(unibi.getIssn().equals("0001-625X")); + assertTrue(unibi.getIssnL().equals("0001-625X")); + assertTrue(unibi.getTitle().equals("Acta Mycologica")); + + } + if (count == 43158) { + assertTrue(unibi.getIssn().equals("2088-6330")); + assertTrue(unibi.getIssnL().equals("2088-6330")); + assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama")); + + } + if (count == 67027) { + assertTrue(unibi.getIssn().equals("2658-7068")); + assertTrue(unibi.getIssnL().equals("2308-2488")); + assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ.")); + } + + count += 1; + } + + assertEquals(67028, count); + } + + @Disabled + @Test + void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException { + + String fileURL = "https://doaj.org/csv"; + + final String outputFile = workingDir + "/doaj.json"; + new DownloadCSV() + .doDownload( + fileURL, + workingDir + "/doaj", + outputFile, + DOAJModel.class.getName(), + ',', + fs); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); + if (count == 0) { + assertEquals("0001-3765", doaj.getIssn()); + assertEquals("1678-2690", doaj.getEissn()); + assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle()); + } + if (count == 22) { + log.info(new ObjectMapper().writeValueAsString(doaj)); + System.out.println(new ObjectMapper().writeValueAsString(doaj)); + } + if (count == 7904) { + // log.info(new ObjectMapper().writeValueAsString(doaj)); + assertEquals("", doaj.getIssn()); + assertEquals("2055-7159", doaj.getEissn()); + assertEquals("BJR|case reports", doaj.getJournalTitle()); + } + if (count == 16707) { + + assertEquals("2783-1043", doaj.getIssn()); + assertEquals("2783-1051", doaj.getEissn()); + assertEquals("فیزیک کاربردی ایران", doaj.getJournalTitle()); + } + + count += 1; + } + + assertEquals(16715, count); + } + + @AfterAll + public static void cleanup() { + FileUtils.deleteQuietly(new File(workingDir)); + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala new file mode 100644 index 000000000..1bdcb60aa --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala @@ -0,0 +1,138 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.{Datasource, OpenAccessRoute, Publication} +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} +import org.junit.jupiter.api.Test + +class TestApply extends java.io.Serializable{ + + @Test + def testApplyOnResult (): Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val pub = getClass.getResource("publication.json").getPath + val hbm = getClass.getResource("preparedInfo.json").getPath + + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + implicit val mapEncoderPubInfo: Encoder[Publication] = Encoders.bean(classOf[Publication]) + + + val pub_ds :Dataset[Publication] = spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[Publication])) + val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + + + assertEquals(13, pub_ds.count()) + + val ds:Dataset[Publication] = SparkApplyHostedByMapToResult.applyHBtoPubs(hbm_ds, pub_ds) + + assertEquals(13, ds.count) + + val temp: Dataset[(Publication, Publication)] = pub_ds.joinWith(ds, pub_ds.col("id").equalTo(ds.col("id")), "left") + assertEquals(13, temp.count()) + temp.foreach(t2 => { + val pb : Publication = t2._1 + val pa : Publication = t2._2 + assertEquals(1, pa.getInstance().size()) + assertEquals(1, pb.getInstance().size()) + assertTrue(t2._1.getId.equals(t2._2.getId)) + if(pb.getId.equals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9")){ + assertTrue(pa.getInstance().get(0).getHostedby.getKey.equals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735")) + assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals("Academic Therapy")) + assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals("OPEN")) + assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals("Open Access")) + assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.hybrid)) + assertTrue(pa.getBestaccessright.getClassid.equals("OPEN")) + assertTrue(pa.getBestaccessright.getClassname.equals("Open Access")) + + + assertTrue(pb.getInstance().get(0).getHostedby.getKey.equals("10|openaire____::0b74b6a356bbf23c245f9ae9a748745c")) + assertTrue(pb.getInstance().get(0).getHostedby.getValue.equals("Revistas de investigación Universidad Nacional Mayor de San Marcos")) + assertTrue(pb.getInstance().get(0).getAccessright.getClassname.equals("not available")) + assertTrue(pb.getInstance().get(0).getAccessright.getClassid.equals("UNKNOWN")) + assertTrue(pb.getInstance().get(0).getAccessright.getOpenAccessRoute == null) + assertTrue(pb.getBestaccessright.getClassid.equals("UNKNOWN")) + assertTrue(pb.getBestaccessright.getClassname.equals("not available")) + + }else{ + assertTrue(pa.getInstance().get(0).getHostedby.getKey.equals(pb.getInstance().get(0).getHostedby.getKey)) + assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals(pb.getInstance().get(0).getHostedby.getValue)) + assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals(pb.getInstance().get(0).getAccessright.getClassid)) + assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals(pb.getInstance().get(0).getAccessright.getClassname)) + assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute == pb.getInstance().get(0).getAccessright.getOpenAccessRoute) + + } + }) + + spark.close() + } + + + @Test + def testApplyOnDatasource():Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val dats = getClass.getResource("datasource.json").getPath + val hbm = getClass.getResource("preparedInfo2.json").getPath + + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + implicit val mapEncoderPubInfo: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) + + + val dats_ds :Dataset[Datasource] = spark.read.textFile(dats).map(p => mapper.readValue(p, classOf[Datasource])) + val hbm_ds :Dataset[EntityInfo] = Aggregators.datasourceToSingleId(spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo]))) + + + assertEquals(10, dats_ds.count()) + + val ds:Dataset[Datasource] = SparkApplyHostedByMapToDatasource.applyHBtoDats(hbm_ds, dats_ds) + + assertEquals(10, ds.count) + + val temp: Dataset[(Datasource, Datasource)] = dats_ds.joinWith(ds, dats_ds.col("id").equalTo(ds.col("id")), "left") + assertEquals(10, temp.count()) + temp.foreach(t2 => { + val pb : Datasource = t2._1 + val pa : Datasource = t2._2 + assertTrue(t2._1.getId.equals(t2._2.getId)) + if(pb.getId.equals("10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d")) { + assertTrue(pa.getOpenairecompatibility().getClassid.equals("hostedBy")) + assertTrue(pa.getOpenairecompatibility().getClassname.equals("collected from a compatible aggregator")) + + assertTrue(pb.getOpenairecompatibility().getClassid.equals(ModelConstants.UNKNOWN)) + + + } else { + assertTrue(pa.getOpenairecompatibility().getClassid.equals(pb.getOpenairecompatibility.getClassid)) + assertTrue(pa.getOpenairecompatibility().getClassname.equals(pb.getOpenairecompatibility.getClassname)) + + } + }) + + spark.close() + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala new file mode 100644 index 000000000..a3a753a8a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala @@ -0,0 +1,159 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{joinResHBM, prepareResultInfo, toEntityInfo} +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import eu.dnetlib.dhp.schema.oaf.{Datasource, OpenAccessRoute, Publication} +import javax.management.openmbean.OpenMBeanAttributeInfo +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.json4s +import org.json4s.DefaultFormats +import eu.dnetlib.dhp.schema.common.ModelConstants +import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} +import org.junit.jupiter.api.Test + +class TestPrepare extends java.io.Serializable{ + + def getString(input:HostedByItemType):String = { + + import org.json4s.jackson.Serialization.write + implicit val formats = DefaultFormats + + write(input) + } + + + @Test + def testHostedByMaptoEntityInfo() : Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val hbm = getClass.getResource("hostedbymap.json").getPath + + + import spark.implicits._ + + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + val ds :Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hbm)).map(toEntityInfo) + + ds.foreach(e => println(mapper.writeValueAsString(e))) + + assertEquals(20, ds.count) + spark.close() + } + + @Test + def testPublicationtoEntityInfo() : Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val path = getClass.getResource("publication.json").getPath + + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + val ds :Dataset[EntityInfo] = prepareResultInfo(spark, path) + + ds.foreach(e => println(mapper.writeValueAsString(e))) + + assertEquals(2, ds.count) + + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournalId.equals("1728-5852")).first().getId) + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournalId.equals("0001-396X")).first().getId) + + spark.close() + } + + @Test + def testJoinResHBM (): Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val pub = getClass.getResource("iteminfofrompublication").getPath + val hbm = getClass.getResource("iteminfofromhostedbymap.json").getPath + + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + val pub_ds :Dataset[EntityInfo] = spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[EntityInfo])) + val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + + val ds: Dataset[EntityInfo] = joinResHBM(pub_ds, hbm_ds) + + assertEquals(1, ds.count) + + val ei:EntityInfo = ds.first() + + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ei.getId) + assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHostedById) + assertEquals("0001-396X", ei.getJournalId) + assertEquals("Academic Therapy", ei.getName) + assertTrue(!ei.getOpenAccess) + + spark.close() + } + + @Test + def testJoinResHBM2 (): Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val pub = getClass.getResource("iteminfofrompublication2").getPath + val hbm = getClass.getResource("iteminfofromhostedbymap2.json").getPath + + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + val pub_ds :Dataset[EntityInfo] = spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[EntityInfo])) + val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + + val ds: Dataset[EntityInfo] = joinResHBM(pub_ds, hbm_ds) + + assertEquals(1, ds.count) + + val ei:EntityInfo = ds.first() + + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ei.getId) + assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHostedById) + assertEquals("Academic Therapy", ei.getName) + assertTrue(ei.getOpenAccess) + + ds.foreach(e => println(mapper.writeValueAsString(e))) + + spark.close() + } + + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala new file mode 100644 index 000000000..5b00e9b6f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -0,0 +1,170 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import eu.dnetlib.dhp.schema.oaf.Datasource +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.json4s.DefaultFormats +import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue} +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.Assertions._ +import org.json4s.jackson.Serialization.write + +class TestPreprocess extends java.io.Serializable{ + + implicit val mapEncoderDats: Encoder[Datasource] = Encoders.kryo[Datasource] + implicit val schema = Encoders.product[HostedByInfo] + + + def toHBIString (hbi:HostedByItemType): String = { + implicit val formats = DefaultFormats + + write(hbi) + } + + @Test + def readDatasource():Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val path = getClass.getResource("datasource.json").getPath + + val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.oaHostedByDataset(spark, path) + + assertEquals(9, ds.count) + + assertEquals(8, ds.filter(hbi => !hbi.issn.equals("")).count) + assertEquals(5, ds.filter(hbi => !hbi.eissn.equals("")).count) + assertEquals(0, ds.filter(hbi => !hbi.lissn.equals("")).count) + + assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365")).count == 1) + assertTrue(ds.filter(hbi => hbi.eissn.equals("2253-900X")).count == 1) + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.eissn.equals("2253-900X")).count == 1) + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.officialname.equals("Thémata")).count == 1) + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.id.equals("10|doajarticles::abbc9265bea9ff62776a1c39785af00c")).count == 1) + ds.foreach(hbi => assertTrue(hbi.id.startsWith("10|"))) + ds.foreach(hbi => println(toHBIString(hbi))) + spark.close() + } + + + @Test + def readGold():Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val path = getClass.getResource("unibi_transformed.json").getPath + + + val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.goldHostedByDataset(spark, path) + + assertEquals(29, ds.count) + + assertEquals(29, ds.filter(hbi => !hbi.issn.equals("")).count) + assertEquals(0, ds.filter(hbi => !hbi.eissn.equals("")).count) + assertEquals(29, ds.filter(hbi => !hbi.lissn.equals("")).count) + + assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + + assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).first().officialname.equals("European journal of sustainable development.")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).first().lissn.equals("2239-5938")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).count == 1) + ds.foreach(hbi => assertTrue(hbi.id.equals(Constants.UNIBI))) + ds.foreach(hbi => println(toHBIString(hbi))) + + spark.close() + } + + @Test + def readDoaj():Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val path = getClass.getResource("doaj_transformed.json").getPath + + val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.doajHostedByDataset(spark, path) + + assertEquals(25, ds.count) + + assertEquals(14, ds.filter(hbi => !hbi.issn.equals("")).count) + assertEquals(21, ds.filter(hbi => !hbi.eissn.equals("")).count) + assertEquals(0, ds.filter(hbi => !hbi.lissn.equals("")).count) + + assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + + assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).first().officialname.equals("Journal of Space Technology")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).first().eissn.equals("2411-5029")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).count == 1) + assertTrue(ds.filter(hbi => hbi.eissn.equals("2077-2955")).first().issn.equals("")) + ds.foreach(hbi => assertTrue(hbi.id.equals(Constants.DOAJ))) + ds.foreach(hbi => println(toHBIString(hbi))) + + spark.close() + } + + @Test + def testAggregator() : Unit = { + + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + + + val tmp = SparkProduceHostedByMap.oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath) + .union(SparkProduceHostedByMap.goldHostedByDataset(spark,getClass.getResource("unibi_transformed.json").getPath)) + .union(SparkProduceHostedByMap.doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath)) + .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])) + + assertEquals(106, tmp.count) + assertEquals(82, tmp.map(i => i._1)(Encoders.STRING).distinct().count) + + + val ds :Dataset[(String, HostedByItemType)] = Aggregators.explodeHostedByItemType(SparkProduceHostedByMap.oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath) + .union(SparkProduceHostedByMap.goldHostedByDataset(spark,getClass.getResource("unibi_transformed.json").getPath)) + .union(SparkProduceHostedByMap.doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath)) + .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]))) + + assertEquals(82, ds.count) + + assertEquals(13, ds.filter(i => i._2.id.startsWith("10|")).count) + + assertTrue(ds.filter(i => i._1.equals("2077-3757")).first()._2.id.startsWith("10|")) + assertTrue(ds.filter(i => i._1.equals("2077-3757")).first()._2.openAccess) + assertEquals(1, ds.filter(i => i._1.equals("2077-3757")).count) + + val hbmap : Dataset[String] = ds.filter(hbi => hbi._2.id.startsWith("10|")).map(SparkProduceHostedByMap.toHostedByMap)(Encoders.STRING) + + hbmap.foreach(entry => println(entry)) + spark.close() + + } + + + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java index 0089811cf..2d28ee305 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java @@ -15,7 +15,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Datasource; -public class MergeGraphTableSparkJobTest { +class MergeGraphTableSparkJobTest { private ObjectMapper mapper; @@ -25,7 +25,7 @@ public class MergeGraphTableSparkJobTest { } @Test - public void testMergeDatasources() throws IOException { + void testMergeDatasources() throws IOException { assertEquals( "openaire-cris_1.1", MergeGraphTableSparkJob diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java index e0d202209..67490a470 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java @@ -9,6 +9,7 @@ import java.io.IOException; import java.util.List; import org.apache.commons.io.IOUtils; +import org.dom4j.DocumentException; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -24,7 +25,7 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class GenerateEntitiesApplicationTest { +class GenerateEntitiesApplicationTest { @Mock private ISLookUpService isLookUpService; @@ -44,7 +45,7 @@ public class GenerateEntitiesApplicationTest { } @Test - public void testMergeResult() throws IOException { + void testMergeResult() throws IOException, DocumentException { Result publication = getResult("oaf_record.xml", Publication.class); Result dataset = getResult("odf_dataset.xml", Dataset.class); Result software = getResult("odf_software.xml", Software.class); @@ -76,7 +77,8 @@ public class GenerateEntitiesApplicationTest { assertEquals(resultType, merge.getResulttype().getClassid()); } - protected Result getResult(String xmlFileName, Class clazz) throws IOException { + protected Result getResult(String xmlFileName, Class clazz) + throws IOException, DocumentException { final String xml = IOUtils.toString(getClass().getResourceAsStream(xmlFileName)); return new OdfToOafMapper(vocs, false, true) .processMdRecord(xml) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index c431b4dd8..27e33bf27 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -8,20 +8,22 @@ import static org.mockito.Mockito.lenient; import java.io.IOException; import java.util.List; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; +import org.dom4j.DocumentException; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; @@ -29,7 +31,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class MappersTest { +class MappersTest { @Mock private ISLookUpService isLookUpService; @@ -48,9 +50,9 @@ public class MappersTest { } @Test - void testPublication() throws IOException { + void testPublication() throws IOException, DocumentException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -71,7 +73,7 @@ public class MappersTest { assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); assertFalse(p.getDataInfo().getInvisible()); - assertTrue(p.getSource().size() == 1); + assertEquals(1, p.getSource().size()); assertTrue(StringUtils.isNotBlank(p.getDateofcollection())); assertTrue(StringUtils.isNotBlank(p.getDateoftransformation())); @@ -88,7 +90,7 @@ public class MappersTest { .getPid() .stream() .findFirst() - .get(); + .orElseThrow(() -> new IllegalStateException("missing author pid")); assertEquals("0000-0001-6651-1178", pid.getValue()); assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid()); assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname()); @@ -108,7 +110,6 @@ public class MappersTest { assertTrue(p.getInstance().size() > 0); p .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("OPEN", i.getAccessright().getClassid()); @@ -141,17 +142,15 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(r2.getRelType())); assertTrue(r1.getValidated()); assertTrue(r2.getValidated()); - assertEquals(r1.getValidationDate(), "2020-01-01"); - assertEquals(r2.getValidationDate(), "2020-01-01"); - // System.out.println(new ObjectMapper().writeValueAsString(p)); - // System.out.println(new ObjectMapper().writeValueAsString(r1)); - // System.out.println(new ObjectMapper().writeValueAsString(r2)); + assertEquals("2020-01-01", r1.getValidationDate()); + assertEquals("2020-01-01", r2.getValidationDate()); } @Test - void testPublication_PubMed() throws IOException { + void testPublication_PubMed() throws IOException, DocumentException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record_pubmed.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record_pubmed.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -196,23 +195,22 @@ public class MappersTest { assertTrue(p.getSubject().size() > 0); assertTrue(p.getPid().size() > 0); - assertEquals(p.getPid().get(0).getValue(), "PMC1517292"); - assertEquals(p.getPid().get(0).getQualifier().getClassid(), "pmc"); + assertEquals("PMC1517292", p.getPid().get(0).getValue()); + assertEquals("pmc", p.getPid().get(0).getQualifier().getClassid()); assertNotNull(p.getInstance()); assertTrue(p.getInstance().size() > 0); p .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("OPEN", i.getAccessright().getClassid()); }); assertEquals("UNKNOWN", p.getInstance().get(0).getRefereed().getClassid()); assertNotNull(p.getInstance().get(0).getPid()); - assertTrue(p.getInstance().get(0).getPid().size() == 2); + assertEquals(2, p.getInstance().get(0).getPid().size()); - assertTrue(p.getInstance().get(0).getAlternateIdentifier().size() == 1); + assertEquals(1, p.getInstance().get(0).getAlternateIdentifier().size()); assertEquals("doi", p.getInstance().get(0).getAlternateIdentifier().get(0).getQualifier().getClassid()); assertEquals("10.3897/oneeco.2.e13718", p.getInstance().get(0).getAlternateIdentifier().get(0).getValue()); @@ -221,9 +219,9 @@ public class MappersTest { } @Test - void testPublicationInvisible() throws IOException { + void testPublicationInvisible() throws IOException, DocumentException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record.xml"))); final List list = new OafToOafMapper(vocs, true, true).processMdRecord(xml); @@ -237,8 +235,18 @@ public class MappersTest { } @Test - void testDataset() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_dataset.xml")); + void testOdfFwfEBookLibrary() throws IOException { + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_fwfebooklibrary.xml"))); + + assertThrows( + IllegalArgumentException.class, + () -> new OdfToOafMapper(vocs, false, true).processMdRecord(xml)); + } + + @Test + void testDataset() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_dataset.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -281,12 +289,13 @@ public class MappersTest { .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .findFirst(); assertTrue(author.isPresent()); - final StructuredProperty pid = author + final Optional oPid = author .get() .getPid() .stream() - .findFirst() - .get(); + .findFirst(); + assertTrue(oPid.isPresent()); + final StructuredProperty pid = oPid.get(); assertEquals("0000-0001-9074-1619", pid.getValue()); assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid()); assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname()); @@ -315,7 +324,6 @@ public class MappersTest { assertTrue(d.getInstance().size() > 0); d .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("OPEN", i.getAccessright().getClassid()); @@ -345,13 +353,14 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(r2.getRelType())); assertTrue(r1.getValidated()); assertTrue(r2.getValidated()); - assertEquals(r1.getValidationDate(), "2020-01-01"); - assertEquals(r2.getValidationDate(), "2020-01-01"); + assertEquals("2020-01-01", r1.getValidationDate()); + assertEquals("2020-01-01", r2.getValidationDate()); } @Test - void testOdfBielefeld() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_bielefeld.xml")); + void testOdfBielefeld() throws IOException, DocumentException { + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_bielefeld.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -389,7 +398,6 @@ public class MappersTest { assertTrue(p.getInstance().size() > 0); p .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("OPEN", i.getAccessright().getClassid()); @@ -398,8 +406,9 @@ public class MappersTest { } @Test - void testOpentrial() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_opentrial.xml")); + void testOpentrial() throws IOException, DocumentException { + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_opentrial.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -444,7 +453,7 @@ public class MappersTest { assertEquals(1, d.getDescription().size()); assertTrue(StringUtils.isNotBlank(d.getDescription().get(0).getValue())); - assertTrue(d.getAuthor().size() == 1); + assertEquals(1, d.getAuthor().size()); assertEquals("Jensen, Kristian K", d.getAuthor().get(0).getFullname()); assertEquals("Kristian K.", d.getAuthor().get(0).getName()); assertEquals("Jensen", d.getAuthor().get(0).getSurname()); @@ -462,7 +471,7 @@ public class MappersTest { assertTrue(d.getContext().isEmpty()); assertNotNull(d.getInstance()); - assertTrue(d.getInstance().size() == 1); + assertEquals(1, d.getInstance().size()); final Instance i = d.getInstance().get(0); @@ -514,8 +523,8 @@ public class MappersTest { } @Test - void testSoftware() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_software.xml")); + void testSoftware() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_software.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -532,30 +541,23 @@ public class MappersTest { assertTrue(s.getInstance().size() > 0); } - // @Test - void testDataset_2() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_dataset_2.xml")); - - final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); - - System.out.println("***************"); - System.out.println(new ObjectMapper().writeValueAsString(list)); - System.out.println("***************"); - } - @Test - void testClaimDedup() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_claim_dedup.xml")); + void testClaimDedup() throws IOException, DocumentException { + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_dedup.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); + assertNotNull(list); + assertFalse(list.isEmpty()); + System.out.println("***************"); System.out.println(new ObjectMapper().writeValueAsString(list)); System.out.println("***************"); } @Test - void testNakala() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_nakala.xml")); + void testNakala() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_nakala.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -582,8 +584,8 @@ public class MappersTest { } @Test - void testEnermaps() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("enermaps.xml")); + void testEnermaps() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("enermaps.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -607,8 +609,9 @@ public class MappersTest { } @Test - void testClaimFromCrossref() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_claim_crossref.xml")); + void testClaimFromCrossref() throws IOException, DocumentException { + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_crossref.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -623,8 +626,8 @@ public class MappersTest { } @Test - void testODFRecord() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_record.xml")); + void testODFRecord() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_record.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); System.out.println(new ObjectMapper().writeValueAsString(list)); @@ -637,8 +640,8 @@ public class MappersTest { } @Test - void testTextGrid() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("textgrid.xml")); + void testTextGrid() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("textgrid.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -652,9 +655,9 @@ public class MappersTest { assertEquals(1, p.getAuthor().size()); assertEquals("OPEN", p.getBestaccessright().getClassid()); - assertTrue(p.getPid().size() == 1); + assertEquals(1, p.getPid().size()); assertTrue(PidType.isValid(p.getPid().get(0).getQualifier().getClassid())); - assertTrue(PidType.handle.equals(PidType.valueOf(p.getPid().get(0).getQualifier().getClassid()))); + assertEquals(PidType.handle, PidType.valueOf(p.getPid().get(0).getQualifier().getClassid())); assertEquals("hdl:11858/00-1734-0000-0003-EE73-2", p.getPid().get(0).getValue()); assertEquals("dataset", p.getResulttype().getClassname()); assertEquals(1, p.getInstance().size()); @@ -671,8 +674,8 @@ public class MappersTest { } @Test - void testBologna() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf-bologna.xml")); + void testBologna() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf-bologna.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -688,8 +691,8 @@ public class MappersTest { } @Test - void testJairo() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_jairo.xml")); + void testJairo() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_jairo.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -702,7 +705,7 @@ public class MappersTest { assertNotNull(p.getTitle()); assertFalse(p.getTitle().isEmpty()); - assertTrue(p.getTitle().size() == 1); + assertEquals(1, p.getTitle().size()); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); final Publication p_cleaned = cleanup(fixVocabularyNames(p)); @@ -712,8 +715,9 @@ public class MappersTest { } @Test - void testOdfFromHdfs() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_from_hdfs.xml")); + void testOdfFromHdfs() throws IOException, DocumentException { + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_from_hdfs.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -749,7 +753,6 @@ public class MappersTest { assertTrue(p.getInstance().size() > 0); p .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("UNKNOWN", i.getAccessright().getClassid()); @@ -757,6 +760,40 @@ public class MappersTest { assertEquals("UNKNOWN", p.getInstance().get(0).getRefereed().getClassid()); } + @Test + void testXMLEncodedURL() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("encoded-url.xml"))); + final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + final Publication p = (Publication) list.get(0); + assertTrue(p.getInstance().size() > 0); + + String decoded = "https://www.ec.europa.eu/research/participants/documents/downloadPublic?documentIds=080166e5af388993&appId=PPGMS"; + assertEquals(decoded, p.getInstance().get(0).getUrl().get(0)); + } + + @Test + void testXMLEncodedURL_ODF() throws IOException, DocumentException { + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("encoded-url_odf.xml"))); + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + final Dataset p = (Dataset) list.get(0); + assertTrue(p.getInstance().size() > 0); + for (String url : p.getInstance().get(0).getUrl()) { + System.out.println(url); + assertTrue(!url.contains("&")); + } + } + private void assertValidId(final String id) { // System.out.println(id); @@ -768,13 +805,16 @@ public class MappersTest { private List vocs() throws IOException { return IOUtils .readLines( - GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt")); + Objects + .requireNonNull(MappersTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt"))); } private List synonyms() throws IOException { return IOUtils .readLines( - GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt")); + Objects + .requireNonNull( + MappersTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt"))); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index d529d2eb2..b65bd9fd8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -28,7 +28,12 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; @ExtendWith(MockitoExtension.class) @@ -78,6 +83,23 @@ public class MigrateDbEntitiesApplicationTest { assertEquals(getValueAsString("issnPrinted", fields), ds.getJournal().getIssnPrinted()); assertEquals(getValueAsString("issnOnline", fields), ds.getJournal().getIssnOnline()); assertEquals(getValueAsString("issnLinking", fields), ds.getJournal().getIssnLinking()); + + assertEquals("pubsrepository::journal", ds.getDatasourcetype().getClassid()); + assertEquals("dnet:datasource_typologies", ds.getDatasourcetype().getSchemeid()); + + assertEquals("pubsrepository::journal", ds.getDatasourcetypeui().getClassid()); + assertEquals("dnet:datasource_typologies_ui", ds.getDatasourcetypeui().getSchemeid()); + + assertEquals("National", ds.getJurisdiction().getClassid()); + assertEquals("eosc:jurisdictions", ds.getJurisdiction().getSchemeid()); + + assertTrue(ds.getThematic()); + assertTrue(ds.getKnowledgegraph()); + + assertEquals(1, ds.getContentpolicies().size()); + assertEquals("Journal article", ds.getContentpolicies().get(0).getClassid()); + assertEquals("eosc:contentpolicies", ds.getContentpolicies().get(0).getSchemeid()); + } @Test @@ -119,7 +141,7 @@ public class MigrateDbEntitiesApplicationTest { assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemeid()); assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemename()); assertEquals(getValueAsString("collectedfromname", fields), o.getCollectedfrom().get(0).getValue()); - List alternativenames = getValueAsList("alternativenames", fields); + final List alternativenames = getValueAsList("alternativenames", fields); assertEquals(2, alternativenames.size()); assertTrue(alternativenames.contains("Pippo")); assertTrue(alternativenames.contains("Foo")); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java index fb2c90e5c..3b9616de3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java @@ -26,8 +26,6 @@ import io.fares.junit.mongodb.MongoForAllExtension; @Disabled public class MigrateMongoMdstoresApplicationTest { - private static final Logger log = LoggerFactory.getLogger(MigrateMongoMdstoresApplicationTest.class); - public static final String COLL_NAME = "9eed8a4d-bb41-47c3-987f-9d06aee0dec0::1453898911558"; @RegisterExtension diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java index 3fd365416..c9c32edd9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java @@ -75,14 +75,14 @@ public class PatchRelationApplicationTest { } @Test - public void testPatchRelationApplication() throws Exception { + void testPatchRelationApplication() throws Exception { final String graphBasePath = workingDir.toString() + "/graphBasePath"; PatchRelationsApplication.main(new String[] { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-graphBasePath", graphBasePath, - "-workingDir", workingDir.toString() + "/workingDir", - "-idMappingPath", workingDir.toString() + "/" + ID_MAPPING_PATH + "-workingDir", workingDir + "/workingDir", + "-idMappingPath", workingDir + "/" + ID_MAPPING_PATH }); final List rels = spark diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java index 110fabf45..ec059ad73 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java @@ -9,7 +9,7 @@ import java.util.List; import org.junit.jupiter.api.Test; -public class ReflectionTest { +class ReflectionTest { private final Cleaner cleaner = new Cleaner(); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala index 894c0a795..87279eb21 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala @@ -1,9 +1,13 @@ package eu.dnetlib.dhp.sx.graph.bio.pubmed import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature} +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.utils.{CleaningFunctions, OafMapperUtils, PidType} import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF.ScholixResolved import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF +import eu.dnetlib.dhp.sx.graph.bio.pubmed.PubMedToOaf.dataInfo +import eu.dnetlib.dhp.sx.graph.ebi.SparkDownloadEBILinks import org.json4s.DefaultFormats import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.parse @@ -47,6 +51,8 @@ class BioScholixTest extends AbstractVocabularyTest{ } + + @Test def testEBIData() = { val inputXML = Source.fromInputStream(getClass.getResourceAsStream("pubmed.xml")).mkString @@ -64,6 +70,9 @@ class BioScholixTest extends AbstractVocabularyTest{ assertEquals(10, r.size) assertTrue(r.map(p => p.asInstanceOf[Result]).flatMap(p => p.getInstance().asScala.map(i => i.getInstancetype.getClassid)).exists(p => "0037".equalsIgnoreCase(p))) println(mapper.writeValueAsString(r.head)) + + + } @@ -179,13 +188,6 @@ class BioScholixTest extends AbstractVocabularyTest{ val result:List[Oaf] = l.map(s => BioDBToOAF.scholixResolvedToOAF(s)) assertTrue(result.nonEmpty) - - - - - - - } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala index aaac2b7ee..5b7fbe1cf 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala @@ -4,6 +4,7 @@ import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, Ser import eu.dnetlib.dhp.schema.oaf.{Relation, Result} import eu.dnetlib.dhp.schema.sx.scholix.Scholix import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary +import eu.dnetlib.dhp.sx.graph.SparkResolveRelation import eu.dnetlib.dhp.sx.graph.bio.pubmed.AbstractVocabularyTest import org.json4s import org.json4s.DefaultFormats @@ -30,6 +31,16 @@ class ScholixGraphTest extends AbstractVocabularyTest{ } + @Test + def testExtractPids():Unit = { + + val input = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/scholix/result.json")).mkString + val res =SparkResolveRelation.extractPidsFromRecord(input) + assertNotNull(res) + assertTrue(res._2.size == 2) + + } + @Test def testOAFToSummary():Unit= { val inputRelations = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/oaf_to_summary")).mkString diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json new file mode 100644 index 000000000..97764de00 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json @@ -0,0 +1,10 @@ +{"relType":"resultResult","subRelType":"citation","relClass":"cites","source":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","target":"50|openaire____::007a4870b31056f89b768cf508e1538e"} +{"relType":"resultResult","subRelType":"citation","relClass":"isCitedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"part","relClass":"isPartOf","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"part","relClass":"hasPart","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"review","relClass":"isReviewedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"review","relClass":"reviews","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"relationship","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"publicationDataset","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt index 729296522..79dc7cd2d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt @@ -1231,4 +1231,14 @@ dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-学術雑誌論文(査 dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-紀要論文(査読有り) dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-雑誌記事(査読有り) dnet:review_levels @=@ 0001 @=@ 原著論文(査読有り) -dnet:review_levels @=@ 0001 @=@ 査読論文 \ No newline at end of file +dnet:review_levels @=@ 0001 @=@ 査読論文 +dnet:relation_relClass @=@ Cites @=@ cites +dnet:relation_relClass @=@ IsCitedBy @=@ isCitedBy +dnet:relation_relClass @=@ HasPart @=@ hasPart +dnet:relation_relClass @=@ IsPartOf @=@ isPartOf +dnet:relation_relClass @=@ IsReviewedBy @=@ isReviewedBy +dnet:relation_relClass @=@ Reviews @=@ reviews +dnet:relation_relClass @=@ IsSupplementTo @=@ isSupplementTo +dnet:relation_relClass @=@ IsSupplementedBy @=@ isSupplementedBy +dnet:relation_relClass @=@ IsRelatedTo @=@ isRelatedTo +dnet:relation_subRelType @=@ relationship @=@ publicationDataset \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt index ba47aaf5c..bb1e5fbf9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt @@ -1079,4 +1079,41 @@ dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/DATASET/IS_SUPPLEMENTED dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/AUTHOR/ORCID @=@ An Open Researcher and Contributor ID (ORCID) that can be associated to an author of your publications dnet:review_levels @=@ dnet:review_levels @=@ 0000 @=@ Unknown dnet:review_levels @=@ dnet:review_levels @=@ 0002 @=@ nonPeerReviewed -dnet:review_levels @=@ dnet:review_levels @=@ 0001 @=@ peerReviewed \ No newline at end of file +dnet:review_levels @=@ dnet:review_levels @=@ 0001 @=@ peerReviewed +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Cites @=@ Cites +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsCitedBy @=@ IsCitedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ HasPart @=@ HasPart +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsPartOf @=@ IsPartOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsReviewedBy @=@ IsReviewedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Reviews @=@ Reviews +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsSupplementTo @=@ IsSupplementTo +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsSupplementedBy @=@ IsSupplementedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsRelatedTo @=@ IsRelatedTo +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Compiles @=@ Compiles +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Continues @=@ Continues +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Documents @=@ Documents +dnet:relation_relClass @=@ dnet:relation_relClass @=@ HasAmongTopNSimilarDocuments @=@ HasAmongTopNSimilarDocuments +dnet:relation_relClass @=@ dnet:relation_relClass @=@ HasVersion @=@ HasVersion +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsAmongTopNSimilarDocuments @=@ IsAmongTopNSimilarDocuments +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsCompiledBy @=@ IsCompiledBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsContinuedBy @=@ IsContinuedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsDerivedFrom @=@ IsDerivedFrom +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsDocumentedBy @=@ IsDocumentedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsNewVersionOf @=@ IsNewVersionOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsObsoletedBy @=@ IsObsoletedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsOriginalFormOf @=@ IsOriginalFormOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsPreviousVersionOf @=@ IsPreviousVersionOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsSourceOf @=@ IsSourceOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsVariantFormOf @=@ IsVariantFormOf +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ affiliation @=@ affiliation +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ citation @=@ citation +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ dedup @=@ dedup +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ outcome @=@ outcome +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ part @=@ part +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ participation @=@ participation +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ provision @=@ provision +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ relationship @=@ relationship +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ review @=@ review +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ similarity @=@ similarity +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ supplement @=@ supplement +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ version @=@ version \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json new file mode 100644 index 000000000..5eb41bff5 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json @@ -0,0 +1,10 @@ +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":null,"dataprovider":{"dataInfo":null,"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":null,"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":null,"value":"0.0"},"longitude":{"dataInfo":null,"value":"0.0"},"namespaceprefix":{"dataInfo":null,"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":null,"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":null,"value":"0.0"},"officialname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"UNKNOWN","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":null,"value":false},"subjects":[{"dataInfo":null,"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":null,"value":false},"websiteurl":{"dataInfo":null,"value":"https://www.energyret.ru/jour/"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"native","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2077-3757","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"extraInfo":[],"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2605-8359","issnPrinted":"0751-4239","name":"Cahiers d’études germaniques"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl07514239"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0751-4239"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"extraInfo":[],"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2311-8733","issnPrinted":"2073-1477","name":"Regional Economics Theory and Practice"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl20731477"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2073-1477"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"extraInfo":[],"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0041-1337","name":"Transplantation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00411337"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0041-1337"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"extraInfo":[],"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"1947-9336","issnPrinted":"1947-9328","name":"International Journal of Operations Research and Information Systems"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl19479328"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::1947-9328"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"extraInfo":[],"id":"10|issn___print::b9faf9c36c47169d4328e586eb62247c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0007-1528","name":"Bulletin of the British Mycological Society"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00071528"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0007-1528"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"extraInfo":[],"id":"10|issn__online::709e633c2ecf46396a4ed1b0096da1d0","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2410-3993","issnPrinted":"","name":"Journal of Technology and Innovation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24103993"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn__online::2410-3993"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"citationguidelineurl":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://www.gbif.org/citation-guidelines"},"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"databaseaccesstype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"open"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"datarepository::unknown","classname":"Data Repository","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datauploadrestriction":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"other"},"datauploadtype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"restricted"},"dateofcollection":"2019-02-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bavarian Natural History Collections - occurrence data"},"extraInfo":[],"id":"10|re3data_____::b105fa2123b1e2bc3dfff303454c6f72","lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"missionstatementurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"r3b105fa2123"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DWB BioCASe Data Publication pipeline and RDF service"},"openairecompatibility":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["re3data_____::r3d100012934"],"pid":[],"pidsystems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DOI URN"},"policies":[],"qualitymanagementkind":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"yes"},"releasestartdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2006-01-01"},"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Life Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Plant Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Zoology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Evolution, Anthropology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Biology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Natural Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geology and Palaeontology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geochemistry, Mineralogy and Crystallography"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geosciences (including Geography)"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/dwb_biocase.html"}} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem new file mode 100644 index 000000000..093c57a9c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem @@ -0,0 +1,9 @@ +{"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","officialname":"Известия высших учебных заведений: Проблемы энергетики","issn":"1998-9903","eissn":"","lissn":"","openAccess":false} +{"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","officialname":"Thémata","issn":"0212-8365","eissn":"2253-900X","lissn":"","openAccess":false} +{"id":"10|issn___print::051e86306840dc8255d95c5671e97928","officialname":"Science Technology & Public Policy","issn":"2640-4613","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","officialname":"Cahiers d’études germaniques","issn":"0751-4239","eissn":"2605-8359","lissn":"","openAccess":false} +{"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","officialname":"Regional Economics Theory and Practice","issn":"2073-1477","eissn":"2311-8733","lissn":"","openAccess":false} +{"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","officialname":"Transplantation","issn":"0041-1337","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","officialname":"International Journal of Operations Research and Information Systems","issn":"1947-9328","eissn":"1947-9336","lissn":"","openAccess":false} +{"id":"10|issn___print::b9faf9c36c47169d4328e586eb62247c","officialname":"Bulletin of the British Mycological Society","issn":"0007-1528","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn__online::709e633c2ecf46396a4ed1b0096da1d0","officialname":"Journal of Technology and Innovation","issn":"","eissn":"2410-3993","lissn":"","openAccess":false} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceinfo.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceinfo.json new file mode 100644 index 000000000..fd51cc2f2 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceinfo.json @@ -0,0 +1,9 @@ +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","officialname":"Известия высших учебных заведений: Проблемы энергетики","issn":"1998-9903","eissn":"","lissn":"","openAccess":false} +{"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","officialname":"Thémata","issn":"0212-8365","eissn":"2253-900X","lissn":"","openAccess":false} +{"id":"10|issn___print::051e86306840dc8255d95c5671e97928","officialname":"Science Technology & Public Policy","issn":"2077-3757","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","officialname":"Cahiers d’études germaniques","issn":"0751-4239","eissn":"2605-8359","lissn":"","openAccess":false} +{"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","officialname":"Regional Economics Theory and Practice","issn":"2073-1477","eissn":"2311-8733","lissn":"","openAccess":false} +{"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","officialname":"Transplantation","issn":"0041-1337","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","officialname":"International Journal of Operations Research and Information Systems","issn":"1947-9328","eissn":"1947-9336","lissn":"","openAccess":false} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","officialname":"Bulletin of the British Mycological Society","issn":"0007-1528","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","officialname":"Journal of Technology and Innovation","issn":"","eissn":"2410-3993","lissn":"","openAccess":false} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem new file mode 100644 index 000000000..effd0dd60 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem @@ -0,0 +1,25 @@ +{"id":"doaj","officialname":"Lëd i Sneg","issn":"2076-6734","eissn":"2412-3765","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Компьютерные исследования и моделирование","issn":"2076-7633","eissn":"2077-6853","lissn":"","openAccess":true} +{"id":"doaj","officialname":" Историко-биологические исследования","issn":"2076-8176","eissn":"2500-1221","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Інформаційні технології і засоби навчання","issn":"2076-8184","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Revue Internationale de Pédagogie de l’Enseignement Supérieur","issn":"","eissn":"2076-8427","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Проблемы развития территории","issn":"2076-8915","eissn":"2409-9007","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Rambam Maimonides Medical Journal","issn":"","eissn":"2076-9172","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Membranes","issn":"2077-0375","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Clinical Medicine","issn":"","eissn":"2077-0383","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Agriculture","issn":"","eissn":"2077-0472","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Standartnye Obrazcy","issn":"2077-1177","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Металл и литье Украины","issn":"2077-1304","eissn":"2706-5529","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Marine Science and Engineering","issn":"","eissn":"2077-1312","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Religions","issn":"","eissn":"2077-1444","lissn":"","openAccess":true} +{"id":"doaj","officialname":"GW-Unterricht","issn":"2077-1517","eissn":"2414-4169","lissn":"","openAccess":true} +{"id":"doaj","officialname":"UCV-Scientia","issn":"2077-172X","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Sovremennye Issledovaniâ Socialʹnyh Problem","issn":"2077-1770","eissn":"2218-7405","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Granì","issn":"2077-1800","eissn":"2413-8738","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Economics Finance and Administrative Science","issn":"2077-1886","eissn":"2218-0648","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Science Education International","issn":"","eissn":"2077-2327","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Edumecentro","issn":"","eissn":"2077-2874","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Monteverdia","issn":"","eissn":"2077-2890","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Transformación","issn":"","eissn":"2077-2955","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Space Technology","issn":"2077-3099","eissn":"2411-5029","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Revue de Primatologie","issn":"","eissn":"2077-3757","lissn":"","openAccess":true} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json new file mode 100644 index 000000000..9cec80eb4 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json @@ -0,0 +1,25 @@ +{"journalTitle":"Lëd i Sneg","issn":"2076-6734","eissn":"2412-3765","reviewProcess":"Double blind peer review"} +{"journalTitle":"Компьютерные исследования и моделирование","issn":"2076-7633","eissn":"2077-6853","reviewProcess":"Blind peer review"} +{"journalTitle":" Историко-биологические исследования","issn":"2076-8176","eissn":"2500-1221","reviewProcess":"Double blind peer review"} +{"journalTitle":"Інформаційні технології і засоби навчання","issn":"2076-8184","eissn":"","reviewProcess":"Double blind peer review"} +{"journalTitle":"Revue Internationale de Pédagogie de l’Enseignement Supérieur","issn":"","eissn":"2076-8427","reviewProcess":"Double blind peer review"} +{"journalTitle":"Проблемы развития территории","issn":"2076-8915","eissn":"2409-9007","reviewProcess":"Double blind peer review"} +{"journalTitle":"Rambam Maimonides Medical Journal","issn":"","eissn":"2076-9172","reviewProcess":"Peer review"} +{"journalTitle":"Membranes","issn":"2077-0375","eissn":"","reviewProcess":"Blind peer review"} +{"journalTitle":"Journal of Clinical Medicine","issn":"","eissn":"2077-0383","reviewProcess":"Blind peer review"} +{"journalTitle":"Agriculture","issn":"","eissn":"2077-0472","reviewProcess":"Blind peer review"} +{"journalTitle":"Standartnye Obrazcy","issn":"2077-1177","eissn":"","reviewProcess":"Double blind peer review"} +{"journalTitle":"Металл и литье Украины","issn":"2077-1304","eissn":"2706-5529","reviewProcess":"Double blind peer review"} +{"journalTitle":"Journal of Marine Science and Engineering","issn":"","eissn":"2077-1312","reviewProcess":"Blind peer review"} +{"journalTitle":"Religions","issn":"","eissn":"2077-1444","reviewProcess":"Double blind peer review"} +{"journalTitle":"GW-Unterricht","issn":"2077-1517","eissn":"2414-4169","reviewProcess":"Double blind peer review"} +{"journalTitle":"UCV-Scientia","issn":"2077-172X","eissn":"","reviewProcess":"Peer review"} +{"journalTitle":"Sovremennye Issledovaniâ Socialʹnyh Problem","issn":"2077-1770","eissn":"2218-7405","reviewProcess":"Double blind peer review"} +{"journalTitle":"Granì","issn":"2077-1800","eissn":"2413-8738","reviewProcess":"Double blind peer review"} +{"journalTitle":"Journal of Economics Finance and Administrative Science","issn":"2077-1886","eissn":"2218-0648","reviewProcess":"Double blind peer review"} +{"journalTitle":"Science Education International","issn":"","eissn":"2077-2327","reviewProcess":"Double blind peer review"} +{"journalTitle":"Edumecentro","issn":"","eissn":"2077-2874","reviewProcess":"Double blind peer review"} +{"journalTitle":"Monteverdia","issn":"","eissn":"2077-2890","reviewProcess":"Double blind peer review"} +{"journalTitle":"Transformación","issn":"","eissn":"2077-2955","reviewProcess":"Double blind peer review"} +{"journalTitle":"Journal of Space Technology","issn":"2077-3099","eissn":"2411-5029","reviewProcess":"Double blind peer review"} +{"journalTitle":"Revue de Primatologie","issn":"","eissn":"2077-3757","reviewProcess":"Peer review"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbyitemtype.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbyitemtype.json new file mode 100644 index 000000000..6e4c11f49 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbyitemtype.json @@ -0,0 +1,17 @@ +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","officialname":"Academic Therapy","issn":"0001-396X","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","officialname":"Forschung im Ingenieurwesen","issn":"0015-7899","eissn":"1434-0860","lissn":"","openAccess":true} +{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","officialname":"Review of Polarography","issn":"0034-6691","eissn":"1884-7692","lissn":"","openAccess":false} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","officialname":"Land Economics","issn":"0023-7639","eissn":"1543-8325","lissn":"","openAccess":false} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","officialname":"Journal of Economic Entomology","issn":"0022-0493","eissn":"0022-0493","lissn":"","openAccess":false} +{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","officialname":"Brigham Young University science bulletin","issn":"0068-1024","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","officialname":"Journal of Contemporary Psychotherapy","issn":"0022-0116","eissn":"1573-3564","lissn":"","openAccess":false} +{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","officialname":"Quarterly of Applied Mathematics","issn":"0033-569X","eissn":"1552-4485","lissn":"","openAccess":false} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","officialname":"Revue de Synthèse","issn":"0035-1776","eissn":"1955-2343","lissn":"","openAccess":false} +{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","officialname":"Journal of Statistical Physics","issn":"0022-4715","eissn":"1572-9613","lissn":"","openAccess":false} +{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","officialname":"Children s Literature in Education","issn":"0045-6713","eissn":"1573-1693","lissn":"","openAccess":false} +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","officialname":"Slovenské divadlo","issn":"0037-699X","eissn":"1336-8605","lissn":"","openAccess":true} +{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","officialname":"Vistas in Astronomy","issn":"0083-6656","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","officialname":"Public Administration","issn":"0033-3298","eissn":"1467-9299","lissn":"","openAccess":false} +{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","officialname":"Memory & Cognition","issn":"0090-502X","eissn":"1532-5946","lissn":"","openAccess":false} +{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","officialname":"Littérature","issn":"0047-4800","eissn":"1958-5926","lissn":"","openAccess":false} +{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","officialname":"Proceedings of the Society for Analytical Chemistry","issn":"0037-9697","eissn":"","lissn":"","openAccess":false} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbymap.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbymap.json new file mode 100644 index 000000000..188380bc6 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbymap.json @@ -0,0 +1,20 @@ +{"0001-396X":{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","officialname":"Academic Therapy","issn":"0001-396X","eissn":"","lissn":"","openAccess":false}} +{"0015-7899":{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","officialname":"Forschung im Ingenieurwesen","issn":"0015-7899","eissn":"1434-0860","lissn":"","openAccess":false}} +{"1434-0860":{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","officialname":"Forschung im Ingenieurwesen","issn":"0015-7899","eissn":"1434-0860","lissn":"","openAccess":true}} +{"0022-0116":{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","officialname":"Journal of Contemporary Psychotherapy","issn":"0022-0116","eissn":"1573-3564","lissn":"","openAccess":false}} +{"1573-3564":{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","officialname":"Journal of Contemporary Psychotherapy","issn":"0022-0116","eissn":"1573-3564","lissn":"","openAccess":false}} +{"0022-0493":{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","officialname":"Journal of Economic Entomology","issn":"0022-0493","eissn":"0022-0493","lissn":"","openAccess":false}} +{"0022-4715":{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","officialname":"Journal of Statistical Physics","issn":"0022-4715","eissn":"1572-9613","lissn":"","openAccess":false}} +{"1543-8325":{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","officialname":"Land Economics","issn":"0023-7639","eissn":"1543-8325","lissn":"","openAccess":false}} +{"0023-7639":{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","officialname":"Land Economics","issn":"0023-7639","eissn":"1543-8325","lissn":"","openAccess":false}} +{"0033-3298":{"id":"10|issn___print::91899e3872351895467856daeb798f63","officialname":"Public Administration","issn":"0033-3298","eissn":"1467-9299","lissn":"","openAccess":false}} +{"0033-569X":{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","officialname":"Quarterly of Applied Mathematics","issn":"0033-569X","eissn":"1552-4485","lissn":"","openAccess":false}} +{"0034-6691":{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","officialname":"Review of Polarography","issn":"0034-6691","eissn":"1884-7692","lissn":"","openAccess":false}} +{"0035-1776":{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","officialname":"Revue de Synthèse","issn":"0035-1776","eissn":"1955-2343","lissn":"","openAccess":false}} +{"0037-699X":{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","officialname":"Slovenské divadlo","issn":"0037-699X","eissn":"1336-8605","lissn":"","openAccess":true}} +{"0037-9697":{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","officialname":"Proceedings of the Society for Analytical Chemistry","issn":"0037-9697","eissn":"","lissn":"","openAccess":false}} +{"0045-6713":{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","officialname":"Children s Literature in Education","issn":"0045-6713","eissn":"1573-1693","lissn":"","openAccess":false}} +{"0047-4800":{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","officialname":"Littérature","issn":"0047-4800","eissn":"1958-5926","lissn":"","openAccess":false}} +{"0068-1024":{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","officialname":"Brigham Young University science bulletin","issn":"0068-1024","eissn":"","lissn":"","openAccess":false}} +{"0083-6656":{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","officialname":"Vistas in Astronomy","issn":"0083-6656","eissn":"","lissn":"","openAccess":false}} +{"0090-502X":{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","officialname":"Memory & Cognition","issn":"0090-502X","eissn":"1532-5946","lissn":"","openAccess":false}} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json new file mode 100644 index 000000000..889daae47 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json @@ -0,0 +1,20 @@ +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journalId":"0001-396X","name":"Academic Therapy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journalId":"0033-569X","name":"Quarterly of Applied Mathematics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journalId":"0015-7899","name":"Forschung im Ingenieurwesen","openAccess":false,"hostedById":""} +{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journalId":"0034-6691","name":"Review of Polarography","openAccess":false,"hostedById":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journalId":"1434-0860","name":"Forschung im Ingenieurwesen","openAccess":true,"hostedById":""} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journalId":"0035-1776","name":"Revue de Synthèse","openAccess":false,"hostedById":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journalId":"0022-0116","name":"Journal of Contemporary Psychotherapy","openAccess":false,"hostedById":""} +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journalId":"0037-699X","name":"Slovenské divadlo","openAccess":true,"hostedById":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journalId":"1573-3564","name":"Journal of Contemporary Psychotherapy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journalId":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openAccess":false,"hostedById":""} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journalId":"0022-0493","name":"Journal of Economic Entomology","openAccess":false,"hostedById":""} +{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journalId":"0045-6713","name":"Children s Literature in Education","openAccess":false,"hostedById":""} +{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journalId":"0022-4715","name":"Journal of Statistical Physics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journalId":"0047-4800","name":"Littérature","openAccess":false,"hostedById":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journalId":"1543-8325","name":"Land Economics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journalId":"0068-1024","name":"Brigham Young University science bulletin","openAccess":false,"hostedById":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journalId":"0023-7639","name":"Land Economics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journalId":"0083-6656","name":"Vistas in Astronomy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::91899e3872351895467856daeb798f63","journalId":"0033-3298","name":"Public Administration","openAccess":false,"hostedById":""} +{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journalId":"0090-502X","name":"Memory & Cognition","openAccess":false,"hostedById":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json new file mode 100644 index 000000000..b84f4eb86 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json @@ -0,0 +1,20 @@ +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journalId":"0001-396X","name":"Academic Therapy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journalId":"0033-569X","name":"Quarterly of Applied Mathematics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journalId":"0015-7899","name":"Forschung im Ingenieurwesen","openAccess":false,"hostedById":""} +{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journalId":"0034-6691","name":"Review of Polarography","openAccess":false,"hostedById":""} +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journalId":"1434-0860","name":"Academic Therapy","openAccess":true,"hostedById":""} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journalId":"0035-1776","name":"Revue de Synthèse","openAccess":false,"hostedById":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journalId":"0022-0116","name":"Journal of Contemporary Psychotherapy","openAccess":false,"hostedById":""} +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journalId":"0037-699X","name":"Slovenské divadlo","openAccess":true,"hostedById":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journalId":"1573-3564","name":"Journal of Contemporary Psychotherapy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journalId":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openAccess":false,"hostedById":""} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journalId":"0022-0493","name":"Journal of Economic Entomology","openAccess":false,"hostedById":""} +{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journalId":"0045-6713","name":"Children s Literature in Education","openAccess":false,"hostedById":""} +{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journalId":"0022-4715","name":"Journal of Statistical Physics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journalId":"0047-4800","name":"Littérature","openAccess":false,"hostedById":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journalId":"1543-8325","name":"Land Economics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journalId":"0068-1024","name":"Brigham Young University science bulletin","openAccess":false,"hostedById":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journalId":"0023-7639","name":"Land Economics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journalId":"0083-6656","name":"Vistas in Astronomy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::91899e3872351895467856daeb798f63","journalId":"0033-3298","name":"Public Administration","openAccess":false,"hostedById":""} +{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journalId":"0090-502X","name":"Memory & Cognition","openAccess":false,"hostedById":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication new file mode 100644 index 000000000..e67c05cd0 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication @@ -0,0 +1,2 @@ +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"1728-5852","name":"","openAccess":false,"hostedById":""} +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"0001-396X","name":"","openAccess":false,"hostedById":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 new file mode 100644 index 000000000..e11311d0a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 @@ -0,0 +1,2 @@ +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"1434-0860","name":"","openAccess":false,"hostedById":""} +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"0001-396X","name":"","openAccess":false,"hostedById":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json new file mode 100644 index 000000000..e037c1858 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json @@ -0,0 +1 @@ +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"1434-0860","name":"Academic Therapy","openAccess":true,"hostedById":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json new file mode 100644 index 000000000..846be762a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json @@ -0,0 +1,3 @@ +{"id":"pubid","journalId":"issn","name":"ds_name","openAccess":true,"hostedById":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} +{"id":"pubid","journalId":"issn","name":"ds_name","openAccess":true,"hostedById":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} +{"id":"pubid","journalId":"issn","name":"ds_name","openAccess":true,"hostedById":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c"} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json new file mode 100644 index 000000000..8f6842bf3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json @@ -0,0 +1,13 @@ +{"author":[{"fullname":"Shu, L.","name":"L.","pid":[],"rank":1,"surname":"Shu"},{"fullname":"Zhang, Y.","name":"Y.","pid":[],"rank":2,"surname":"Zhang"},{"fullname":"Chen, X.","name":"X.","pid":[],"rank":3,"surname":"Chen"},{"fullname":"Wang, S.","name":"S.","pid":[],"rank":4,"surname":"Wang"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2015-01-01"},"dateofcollection":"2021-07-10T12:27:03.175Z","dateoftransformation":"2021-07-10T12:38:22.255Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::022a6f1cbe150ebbfe79f31fe220d2e5","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1007/s11036-015-0594-3"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2015-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/f9b55b76-a06d-4b4e-90d5-ff1dee0f53c4"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813784431,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-02-26T11:19:47Z","harvestDate":"2021-07-10T12:27:03.175Z","identifier":"oai:cris.vtt.fi:publications/f9b55b76-a06d-4b4e-90d5-ff1dee0f53c4","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/f9b55b76-a06d-4b4e-90d5-ff1dee0f53c4","50|355e65625b88::022a6f1cbe150ebbfe79f31fe220d2e5"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Shu , L , Zhang , Y , Chen , X & Wang , S 2015 , ' Editorial for Special Issue on Industrial Networks and Intelligent Systems ' , Mobile Networks and Applications , vol. 20 , no. 2 , pp. 121-123 . https://doi.org/10.1007/s11036-015-0594-3"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Editorial for Special Issue on Industrial Networks and Intelligent Systems"}]} +{"author":[{"fullname":"Hemmilä, Kari","name":"Kari","pid":[],"rank":1,"surname":"Hemmilä"},{"fullname":"Saarni, Risto","name":"Risto","pid":[],"rank":2,"surname":"Saarni"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2002-01-01"},"dateofcollection":"2021-07-10T12:31:21.927Z","dateoftransformation":"2021-07-10T13:17:02.345Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Millainen on onnistunut ikkunaremontti? Mikä on asukkaan kannalta paras vaihtoehto? Mitä seikkoja ikkunaremontissa on erityisesti otettava huomioon? Kirjan tekijät diplomi-insinööri Kari Hemmilä ja tekniikan lisensiaatti Risto Saarni ovat olleet mukana monissa ikkunoita ja niiden remontointia koskevissa tutkimusprojekteissa. Näiden lisäksi kirja perustuu lukuisista ikkunaremonteista saatuihin kokemuksiin, remonttien lähtökohtiin, toteutukseen sekä toimivuuden ja käyttökokemusten seurantaan. Onnistunut ikkunaremontti on sellainen, johon ollaan tyytyväisiä vielä vuosien päästä. Sen perustana on perehtyminen nykytilaan, huolellinen toteutus ja kunnossapito, joita tässä kirjassa tuodaan esille monesta näkökulmasta."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::4349eedb466e22fa44b3fe1e0380e0a7","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2002-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/72bdf3be-eec3-4fbf-89bd-b0d9bbf5b523"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813851542,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-08-11T09:10:31Z","harvestDate":"2021-07-10T12:31:21.927Z","identifier":"oai:cris.vtt.fi:publications/72bdf3be-eec3-4fbf-89bd-b0d9bbf5b523","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/72bdf3be-eec3-4fbf-89bd-b0d9bbf5b523","50|355e65625b88::4349eedb466e22fa44b3fe1e0380e0a7"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Rakennustieto oy"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Hemmilä , K & Saarni , R 2002 , Ikkunaremontti . Rakennustieto oy , Helsinki ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Ikkunaremontti"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Window renovation"}]} +{"author":[{"fullname":"Pavlic, Davor","name":"Davor","pid":[],"rank":1,"surname":"Pavlic"},{"fullname":"Pulkkinen, Antti","name":"Antti","pid":[],"rank":2,"surname":"Pulkkinen"},{"fullname":"Riitahuhta, Asko","name":"Asko","pid":[],"rank":3,"surname":"Riitahuhta"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-01-01"},"dateofcollection":"2021-07-10T12:34:48.501Z","dateoftransformation":"2021-07-10T13:26:46.605Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::53dda1c97d9a6da19a97b3c3aadaa3a0","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/22f1944a-0af1-4774-86fe-27cd45ddc6ea"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813869068,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-01-26T12:03:33Z","harvestDate":"2021-07-10T12:34:48.501Z","identifier":"oai:cris.vtt.fi:publications/22f1944a-0af1-4774-86fe-27cd45ddc6ea","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/22f1944a-0af1-4774-86fe-27cd45ddc6ea","50|355e65625b88::53dda1c97d9a6da19a97b3c3aadaa3a0"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Design Society"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Pavlic , D , Pulkkinen , A & Riitahuhta , A 2006 , A conceptual framework of product family architecture . in DS 47: Proceedings of NordDesign 2006 Conference, Rejkjavik, Iceland, 16.-18.08.2006 . Design Society , pp. 212-222 . < https://www.designsociety.org/publication/24280/A+Conceptual+Framework+of+Product+Family+Architecture >"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A conceptual framework of product family architecture"}]} +{"author":[{"fullname":"Carpen, Leena","name":"Leena","pid":[],"rank":1,"surname":"Carpen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1999-01-01"},"dateofcollection":"2021-07-10T12:27:29.079Z","dateoftransformation":"2021-07-10T13:36:20.24Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::5ef53b02a53fa87a5eb236484d9a011d","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1999-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/1f27e75d-365e-4810-be48-ef60378d8279"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813880572,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-05-17T10:37:19Z","harvestDate":"2021-07-10T12:27:29.079Z","identifier":"oai:cris.vtt.fi:publications/1f27e75d-365e-4810-be48-ef60378d8279","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/1f27e75d-365e-4810-be48-ef60378d8279","50|355e65625b88::5ef53b02a53fa87a5eb236484d9a011d"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Carpen , L 1999 , ' Case: microbial induced corrosion in paper machines ' , RenhetsTeknik , no. 2 , 16 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Case: microbial induced corrosion in paper machines"}]} +{"author":[{"fullname":"Tuomola, Tuomas","name":"Tuomas","pid":[],"rank":1,"surname":"Tuomola"},{"fullname":"Siitonen, Veijo","name":"Veijo","pid":[],"rank":2,"surname":"Siitonen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1970-01-01"},"dateofcollection":"2021-07-10T12:30:02.598Z","dateoftransformation":"2021-07-10T13:50:33.549Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::73a86fe5bef6d4fae3caf02f13840439","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1970-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/56ed9ab2-3b3c-4829-a63b-52311969dd30"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813901871,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-09-27T09:19:12Z","harvestDate":"2021-07-10T12:30:02.598Z","identifier":"oai:cris.vtt.fi:publications/56ed9ab2-3b3c-4829-a63b-52311969dd30","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/56ed9ab2-3b3c-4829-a63b-52311969dd30","50|355e65625b88::73a86fe5bef6d4fae3caf02f13840439"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"VTT Technical Research Centre of Finland"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tuomola , T & Siitonen , V 1970 , Koulujen lämmitys ja ilmanvaihto. Osa 2. Eräitä LI-järjestelmien käyttökokeita vuosina 1965-1968 . Valtion teknillinen tutkimuslaitos: Lämpöteknillinen laboratorio. Tiedonanto , no. 4 , VTT Technical Research Centre of Finland , Helsinki ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Koulujen lämmitys ja ilmanvaihto. Osa 2. Eräitä LI-järjestelmien käyttökokeita vuosina 1965-1968"}]} +{"author":[{"fullname":"Paakkari, Jussi","name":"Jussi","pid":[],"rank":1,"surname":"Paakkari"},{"fullname":"Ailisto, Heikki","name":"Heikki","pid":[],"rank":2,"surname":"Ailisto"},{"fullname":"Niskala, Matti","name":"Matti","pid":[],"rank":3,"surname":"Niskala"},{"fullname":"Mäkäräinen, Masa","name":"Masa","pid":[],"rank":4,"surname":"Mäkäräinen"},{"fullname":"Väinämö, Kauko","name":"Kauko","pid":[],"rank":5,"surname":"Väinämö"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1999-01-01"},"dateofcollection":"2021-07-10T12:27:29.082Z","dateoftransformation":"2021-07-10T13:56:12.148Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::7cfd7ecc53246b72e306a5057e4487b3","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1999-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/33cedb13-48f3-4326-b5c3-18d248374378"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813911302,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-01-29T02:07:50Z","harvestDate":"2021-07-10T12:27:29.082Z","identifier":"oai:cris.vtt.fi:publications/33cedb13-48f3-4326-b5c3-18d248374378","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::7cfd7ecc53246b72e306a5057e4487b3","oai:cris.vtt.fi:publications/33cedb13-48f3-4326-b5c3-18d248374378"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Research and Development Centre of Kajaani"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Paakkari , J , Ailisto , H , Niskala , M , Mäkäräinen , M & Väinämö , K 1999 , Machine vision guided waterjet cutting . in 3rd International Symposium on Optics in Engineering. Kajaani, FI, 19 - 21 Jan. 1999 . Research and Development Centre of Kajaani , Kajaani ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Machine vision guided waterjet cutting"}]} +{"author":[{"fullname":"López-Lambas, M.","name":"M.","pid":[],"rank":1,"surname":"López-Lambas"},{"fullname":"López-Suárez, E.","name":"E.","pid":[],"rank":2,"surname":"López-Suárez"},{"fullname":"La Paix-Puello, L.","name":"L.","pid":[],"rank":3,"surname":"La Paix-Puello"},{"fullname":"Binsted, A.","name":"A.","pid":[],"rank":4,"surname":"Binsted"},{"fullname":"Tuominen, Anu","name":"Anu","pid":[],"rank":5,"surname":"Tuominen"},{"fullname":"Järvi, Tuuli","name":"Tuuli","pid":[],"rank":6,"surname":"Järvi"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-01-01"},"dateofcollection":"2021-07-10T12:26:56.401Z","dateoftransformation":"2021-07-10T14:03:19.178Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::899c4bc19c11e4e4ebe8d49a8c51c5f4","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813924212,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-07-31T04:35:29Z","harvestDate":"2021-07-10T12:26:56.401Z","identifier":"oai:cris.vtt.fi:publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15","50|355e65625b88::899c4bc19c11e4e4ebe8d49a8c51c5f4"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"European Commission EC"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"López-Lambas , M , López-Suárez , E , La Paix-Puello , L , Binsted , A , Tuominen , A & Järvi , T 2009 , Sustainable Development methodology development and application results : Deliverable 4.1 . European Commission EC ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Sustainable Development methodology development and application results:Deliverable 4.1"}]} +{"author":[{"fullname":"Vänskä, L.","name":"L.","pid":[],"rank":1,"surname":"Vänskä"},{"fullname":"Rosenberg, Rolf","name":"Rolf","pid":[],"rank":2,"surname":"Rosenberg"},{"fullname":"Pitkänen, V.","name":"V.","pid":[],"rank":3,"surname":"Pitkänen"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1983-01-01"},"dateofcollection":"2021-07-10T12:29:21.556Z","dateoftransformation":"2021-07-10T15:08:41.325Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

An automatic gamma spectrometer for activation analysis has been developed at the Technical Research Centre of Finland. The on-line system comprises a sample changer for up to 120 samples, detector, multichannel analyzer, microcomputer programmed with Basic language and input/output devices.

"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::e8f88044b1a95057152f5827e3f373a4","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/0167-5087(83)90428-3"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1983-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813653066,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-01-01T03:06:44Z","harvestDate":"2021-07-10T12:29:21.556Z","identifier":"oai:cris.vtt.fi:publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::e8f88044b1a95057152f5827e3f373a4","oai:cris.vtt.fi:publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vänskä , L , Rosenberg , R & Pitkänen , V 1983 , ' An automatic gamma spectrometer for activation analysis ' , Nuclear Instruments and Methods In Physics Research , vol. 213 , no. 2-3 , pp. 343 - 347 . https://doi.org/10.1016/0167-5087(83)90428-3"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"An automatic gamma spectrometer for activation analysis"}]} +{"author":[{"fullname":"Silla, Anne","name":"Anne","pid":[],"rank":1,"surname":"Silla"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2021-01-01"},"dateofcollection":"2021-07-10T12:36:22.169Z","dateoftransformation":"2021-07-10T15:19:31.29Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This paper presents an assessment framework with 13 criteria to systematically evaluate measures for improving the safety of level crossings (LCs). The criteria were first applied in the Finnish context, where eight safety measures were estimated to reduce LC accidents by more than 20%. Next, the estimates from the Finnish study were used as a starting point for evaluating innovative and cost-effective LC safety measures piloted during an EU project, SAFER-LC. One such measure was estimated to potentially reduce LC accidents by more than 20%. The proposed assessment framework is a good way to assess and categorise LC safety measures. The summary of the assessment criteria is further intended for decision-makers looking to implement effective measures in a specific situation or at a particular LC."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::f7d973bc9fc15080fa9491d997568847","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2021-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/19966012-bcd9-4427-8136-a60e91b152f9"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813676961,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-07-02T11:56:26Z","harvestDate":"2021-07-10T12:36:22.169Z","identifier":"oai:cris.vtt.fi:publications/19966012-bcd9-4427-8136-a60e91b152f9","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/19966012-bcd9-4427-8136-a60e91b152f9","50|355e65625b88::f7d973bc9fc15080fa9491d997568847"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Silla , A 2021 , Evaluation of Level crossing Safety Measures : Applicability of the Framework to Innovative and Low-cost Measures . in Transportation Research Board : The TRIS and ITRD database . 100th Annual Meeting of the Transportation Research Board (TRB) , Washington DC , United States , 5/01/21 . < https://trid.trb.org/view/1759287 >"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Evaluation of Level crossing Safety Measures:Applicability of the Framework to Innovative and Low-cost Measures"}]} +{"author":[{"fullname":"Barrientos Jiménez, Elsa Julia","name":"Elsa Julia","pid":[],"rank":1,"surname":"Barrientos Jiménez"},{"fullname":"Vildoso Villegas, Jesahel","name":"Jesahel","pid":[],"rank":2,"surname":"Vildoso Villegas"},{"fullname":"Sánchez García, Tula Carola","name":"Tula Carola","pid":[],"rank":3,"surname":"Sánchez García"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"dateofcollection":"2021-03-12T07:05:02.842Z","dateoftransformation":"2021-03-12T07:11:33.642Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This research consists of a diagnosis of the post graduate unit of UNMSM Faculty Education; it counts with the participation of 358 students of Second Specialty, Master and Doctorate programs. To carry out this diagnosis the instrument of the Iberoamerican University Association was taken into account. The following variables were used: students, teachers, study plans, research, management, surroundings, graduates and impact and evaluation. According to the established measurement the post graduate unit has obtained 69,27 points, which places it on a good level. This level considers a range point from 60 through 74."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"La investigación realiza un diagnóstico de la Unidad de Post Grado de la Facultad de Educación de la UNMSM, con la participación de 358 estudiantes de Segunda Especialidad, Maestrías y Doctorado. Para la realización del diagnóstico se consideró el instrumento de la Asociación Universitaria Iberoamericana de Post Grado, que toma en cuenta las siguientes variables: estudiantes, profesores, plan de estudios, investigación, gestión, entorno, egresados e impacto y evaluación. Realizado el diagnóstico de acuerdo a la medición establecida la UPG de Educación de acuerdo al puntaje obtenido de 69, 27 se ubica en el nivel bueno, ya que dicho nivel considera las puntuaciones comprendidas entre 60 y 74."}],"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"distributionlocation":"","hostedby":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://creativecommons.org/licenses/by-nc-sa/4.0"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://revistasinvestigacion.unmsm.edu.pe/index.php/educa/article/view/4754"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"0001-396X","issnPrinted":"1728-5852","name":"Investigación Educativa","sp":"","vol":""},"language":{"classid":"spa","classname":"Spanish; Castilian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627812364983,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Frevistasinvestigacion.unmsm.edu.pe%2Findex.php%2Findex%2Foai","datestamp":"2021-03-06T03:56:00Z","harvestDate":"2021-03-12T07:05:02.842Z","identifier":"oai:ojs.csi.unmsm:article/4754","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","oai:ojs.csi.unmsm:article/4754"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Facultad de Educación, Universidad Nacional Mayor de San Marcos"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol 14 No 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol. 14 Núm. 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1728-5852"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnostic"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluation."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnóstico"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluación."}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNOSTIC OF THE POST GRADUATE UNIT OF UNMSM EDUCATION FACULTY"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNÓSTICO DE LA UNIDAD DE POST GRADO DE LA FACULTAD DE EDUCACIÓN DE LA UNMSM"}]} +{"author":[{"affiliation":[],"fullname":"Jež Rogelj, Mateja","name":"Mateja","pid":[],"rank":1,"surname":"Jež Rogelj"},{"affiliation":[],"fullname":"Mikuš, Ornella","name":"Ornella","pid":[],"rank":2,"surname":"Mikuš"},{"affiliation":[],"fullname":"Grgić, Ivo","name":"Ivo","pid":[],"rank":3,"surname":"Grgić"},{"affiliation":[],"fullname":"Zrakić, Magdalena","name":"Magdalena","pid":[],"rank":4,"surname":"Zrakić"},{"affiliation":[],"fullname":"Hadelan, Lari","name":"Lari","pid":[],"rank":5,"surname":"Hadelan"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"dateofcollection":"2021-07-11T01:25:30.597Z","dateoftransformation":"2021-07-11T02:00:20.813Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Koncept održivog razvoja ima tri komponente: ekološku, ekonomsku i društvenu. U ovom je radu odabirom pet ekoloških indikatora obuhvaćena ekološka komponenta. Indikatori su birani s obzirom na učestalost njihova predlaganja i korištenja u dokumentima Europske unije (EU) i znanstvenim radovima. Cilj rada je vrednovati ekološke indikatore uz argumentiranje njihove važnosti za postizanje održivog ruralnog razvoja provođenjem ankete među ekspertima i različitim dionicima ruralnog razvoja. U anketi je sudjelovalo 47 ispitanika. Od predloženih je indikatora najvišu prosječnu ocjenu dobio indikator zastupljenost ekološke poljoprivrede u ukupnoj poljoprivredi (4, 15), a slijede ga biološka raznolikost biljnih i životinjskih vrsta (4, 09), upotreba pesticida/ha (4, 06), broj uvjetnih grla/ha korištenog zemljišta (4, 00) i upotreba mineralnih gnojiva/ha (3, 91)."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::007d183f5b5b4466cf987bcd50e0c6e3","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/877152"]}],"language":{"classid":"hrv","classname":"Croatian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813176553,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2017-05-25T04:42:10Z","harvestDate":"2021-07-11T01:25:30.597Z","identifier":"877152","metadataNamespace":""}},"originalId":["877152","50|57a035e5b1ae::007d183f5b5b4466cf987bcd50e0c6e3"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2017-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Prijedlog ekoloških indikatora za mjerenje održivog ruralnog razvoja"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Proposal of the environmental indicators for measuring sustainable rural development"}]} +{"author":[{"affiliation":[],"fullname":"Petric, Bartul","name":"Bartul","pid":[],"rank":1,"surname":"Petric"},{"affiliation":[],"fullname":"Petric, Nedjeljka","name":"Nedjeljka","pid":[],"rank":2,"surname":"Petric"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Oliver Le Faucheux"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1977-01-01"},"dateofcollection":"2021-07-11T00:42:48.748Z","dateoftransformation":"2021-07-11T02:01:00.178Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"In the present paper it was studied the waste sludge separation and its use, with the purpose to prevent the sea water pollution."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::012b5c63f06424e2dc1c82ac6a7554d2","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1977-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/314877"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813187318,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2007-12-18T09:21:18Z","harvestDate":"2021-07-11T00:42:48.748Z","identifier":"314877","metadataNamespace":""}},"originalId":["314877","50|57a035e5b1ae::012b5c63f06424e2dc1c82ac6a7554d2"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"1977-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Treatment of Waste Sea Water in Calcium Carbide Industry"}]} +{"author":[{"affiliation":[],"fullname":"Erstić, Marijana","name":"Marijana","pid":[],"rank":1,"surname":"Erstić"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-01-01"},"dateofcollection":"2021-07-11T01:23:38.842Z","dateoftransformation":"2021-07-11T02:01:53.063Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Die Filme Michael Hanekes scheinen vor allem mit jenen des späten Pasolini zu korrespondieren, allen voran mit SALÒ aber auch mit TEOREMA, jenem Film, in dem sich Pasolini dem italienischen Großbürgertum der ausgehenden 1960er Jahre widmet. Erzählt wird in TEOREMA die Geschichte einer zu Beginn des Films anscheinend intakten Familie, die sich durch die Begegnung mit einem Unbekannten der eigenen Leere bewusst wird und auseinanderfällt. Der Film endet mit dem Schrei eines der Protagonisten, der hier jedoch weniger ein neues Leben bedeutet, als vielmehr eine Reaktion auf die repressiven und normativen Mechanismen der Gesellschaft. Hanekes Filme LEMMINGE, TEIL II und DER SIEBENTE KONTINENT gehen jeweils unter-schiedlich von einer vergleichbaren Prämisse einer leeren Familie aus. Doch während die Schreie in den LEMMINGEN immer lauter werden, verstummen sie im SIEBENTEN KONTINENT schließlich. In allen benannten Filmen hat das Werk Francis Bacons eine besondere Rolle gespielt und wurde entweder explizit (TEOREMA, LEMMINGE II) oder implizit (DER SIEBENTE KONTINENT) thematisiert. Der Vortrag geht den Bildern des (stummen) Schreis in den genannten Filmen nach."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::023bc0883fb1075fe0a86e829b6c5d97","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/848053"]}],"language":{"classid":"deu/ger","classname":"German","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813203778,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2016-12-06T18:47:39Z","harvestDate":"2021-07-11T01:23:38.842Z","identifier":"848053","metadataNamespace":""}},"originalId":["848053","50|57a035e5b1ae::023bc0883fb1075fe0a86e829b6c5d97"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2014-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"\"Teorema\" von Pier Paolo Pasolini oder: Ein Schrei auf Francis Bacons Spuren"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"\"Teorema\" by Pier Paolo Pasolini"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv new file mode 100644 index 000000000..eb5d93451 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv @@ -0,0 +1,37 @@ +"ISSN","ISSN_L","ISSN_IN_DOAJ","ISSN_IN_ROAD","ISSN_IN_PMC","ISSN_IN_OAPC","ISSN_IN_WOS","ISSN_IN_SCOPUS","JOURNAL_IN_DOAJ","JOURNAL_IN_ROAD","JOURNAL_IN_PMC","JOURNAL_IN_OAPC","JOURNAL_IN_WOS","JOURNAL_IN_SCOPUS","TITLE","TITLE_SOURCE" +"0001-625X","0001-625X",1,1,0,0,0,1,1,1,0,0,0,1,"Acta Mycologica","DOAJ" +"0002-0397","0002-0397",1,1,0,0,1,1,1,1,0,0,1,1,"Africa Spectrum","DOAJ" +"0003-2565","0003-2565",1,0,0,0,0,0,1,0,0,0,0,0,"Anali Pravnog Fakulteta u Beogradu","DOAJ" +"0003-424X","0003-424X",0,1,0,0,1,0,0,1,0,0,1,0,"Annales de zootechnie.","ROAD" +"0003-4827","0003-4827",0,1,0,0,0,1,0,1,0,0,0,1,"Annals of Iowa.","ROAD" +"0004-0592","0004-0592",1,1,0,0,1,1,1,1,0,0,1,1,"Archivos de Zootecnia","DOAJ" +"0004-282X","0004-282X",1,1,0,0,1,1,1,1,0,0,1,1,"Arquivos de Neuro-Psiquiatria","DOAJ" +"0006-3096","0006-3096",0,1,0,0,0,0,0,1,0,0,0,0,"Biologia.","ROAD" +"0006-8705","0006-8705",1,1,0,0,1,1,1,1,0,0,1,1,"Bragantia","DOAJ" +"0007-5124","0007-5124",0,1,0,0,1,0,0,1,1,0,1,1,"Experimental animals.","ROAD" +"0007-9502","0007-9502",0,1,0,0,0,0,0,1,0,0,0,0,"Caesaraugusta.","ROAD" +"0008-7386","0008-7386",1,1,0,0,0,1,1,1,0,0,0,1,"Časopis pro Moderní Filologii","DOAJ" +"0008-7629","0008-7629",1,0,0,0,0,0,1,0,0,0,0,0,"Catalogue and Index","DOAJ" +"0015-573X","0015-573X",0,1,0,0,0,0,0,1,0,0,0,0,"Folia quaternaria.","ROAD" +"0016-6987","0016-6987",1,0,0,0,1,1,1,0,0,0,1,1,"Genus","DOAJ" +"0016-7789","0016-7789",1,1,0,0,0,1,1,1,0,0,0,1,"Geologija ","DOAJ" +"0021-5007","0021-5007",0,1,0,0,0,1,0,1,0,0,0,1,"Nihon Seitai Gakkaishi.","ROAD" +"0023-4001","0023-4001",0,1,0,0,1,1,0,1,0,0,1,1,"Korean Journal of Parasitology","ROAD" +"0023-5415","0023-5415",1,1,0,0,0,0,1,1,0,0,0,0,"Kunst og Kultur","DOAJ" +"0026-1165","0026-1165",1,0,0,0,1,1,1,0,0,0,1,1,"Journal of the Meteorological Society of Japan","DOAJ" +"0029-0181","0029-0181",0,1,0,0,0,0,0,1,0,0,0,0,"Nihon butsuri gakkaishi.","ROAD" +"0034-7000","0034-7000",1,1,0,0,0,1,1,1,0,0,0,1,"Revista Argentina de Cardiología","DOAJ" +"0034-7523","0034-7523",0,1,0,0,0,1,0,1,0,0,0,1,"Revista cubana de medicina.","ROAD" +"0034-8244","0034-8244",1,0,0,0,1,1,1,0,0,0,1,1,"Revista de Filosofia","DOAJ" +"0034-8678","0034-8678",1,0,0,0,0,0,1,0,0,0,0,0,"Revista de Pedagogie","DOAJ" +"0036-8709","0036-8709",1,1,1,0,1,1,1,1,1,0,1,1,"Scientia Pharmaceutica","DOAJ" +"0044-4855","0044-4855",0,1,0,0,0,0,0,1,0,0,0,0,"Život i škola.","ROAD" +"0048-7449","0048-7449",1,1,0,0,1,1,1,1,0,0,1,1,"Reumatismo","DOAJ" +"0048-766X","0048-766X",0,1,0,0,0,1,0,1,0,0,0,1,"Revista chilena de obstetricia y ginecología.","ROAD" +"0065-1400","0065-1400",0,1,0,0,1,1,0,1,0,0,1,1,"Acta Neurobiologiae Experimentalis.","ROAD" +"0066-6742","0066-6742",1,0,0,0,1,1,1,0,0,0,1,1,"Archivo Español de Arqueología","DOAJ" +"0073-2435","0073-2435",1,1,0,0,1,1,1,1,0,0,1,1,"Historia (Santiago)","DOAJ" +"0073-4918","0073-4918",0,1,0,0,0,0,0,1,0,0,0,0,"Illinois Natural History Survey bulletin.","ROAD" +"0075-7411","0075-7411",1,0,0,0,0,0,1,0,0,0,0,0,"Anales","DOAJ" +"0077-2704","0077-2704",0,1,0,0,0,0,0,1,0,0,0,0,"Namn och bygd.","ROAD" +"0078-5466","0078-5466",0,1,0,0,1,1,0,1,0,0,1,1,"Optica Applicata.","ROAD" \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json new file mode 100644 index 000000000..c4ac62ff5 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json @@ -0,0 +1,29 @@ +{"issn":"2502-731X","issnL":"2502-731X","title":"JIMKESMAS (Jurnal Ilmiah Mahasiswa Kesehatan Masyarakat)","titleSource":"ROAD"} +{"issn":"2502-7409","issnL":"1411-0253","title":"Jurnal ilmu informasi, perpustakaan, dan kearsipan","titleSource":"ROAD"} +{"issn":"2502-7433","issnL":"2502-7433","title":"At-Tadbir : jurnal ilmiah manajemen","titleSource":"ROAD"} +{"issn":"2502-745X","issnL":"2502-745X","title":"Jurnal Kesehatan Panrita Husada.","titleSource":"ROAD"} +{"issn":"2502-7549","issnL":"2502-7549","title":"ELang journal (An English Education journal)","titleSource":"ROAD"} +{"issn":"2423-3633","issnL":"2423-3625","title":"̒Ulūm-i darmāngāhī-i dāmpizishkī-i Īrān.","titleSource":"ROAD"} +{"issn":"2423-5563","issnL":"2423-3773","title":"Pizhūhishnāmah-i ̒ilm/sanjī.","titleSource":"ROAD"} +{"issn":"1735-434X","issnL":"1735-434X","title":"Iranian journal of animal biosystematics.","titleSource":"ROAD"} +{"issn":"2423-4435","issnL":"2008-6113","title":"Majallah-i jangal-i Īrān.","titleSource":"ROAD"} +{"issn":"2423-4575","issnL":"2423-4575","title":"Ābziyān-i zinatī.","titleSource":"ROAD"} +{"issn":"2423-4974","issnL":"2423-4974","title":"Pizhūhishnāmah-i ravābiṭ-i biyn/al- milal.","titleSource":"ROAD"} +{"issn":"2380-0607","issnL":"2380-0607","title":"AIHM journal club.","titleSource":"ROAD"} +{"issn":"1085-4568","issnL":"1085-4568","title":"Frontiers.","titleSource":"ROAD"} +{"issn":"2380-8845","issnL":"2380-8845","title":"˜The œjournal of contemporary archival studies.","titleSource":"ROAD"} +{"issn":"2381-1803","issnL":"2381-1803","title":"International journal of complementary & alternative medicine.","titleSource":"ROAD"} +{"issn":"2381-2478","issnL":"2381-2478","title":"Palapala.","titleSource":"ROAD"} +{"issn":"2382-5170","issnL":"2382-5170","title":"Asia pacific journal of environment ecology and sustainable development.","titleSource":"ROAD"} +{"issn":"2382-9737","issnL":"2382-9737","title":"Majallah-i salāmat va bihdāsht","titleSource":"ROAD"} +{"issn":"2382-977X","issnL":"2382-977X","title":"UCT journal of research in science ,engineering and technology","titleSource":"ROAD"} +{"issn":"2382-9974","issnL":"2382-9974","title":"Bih/nizhādī-i giyāhān-i zirā̒ī va bāghī.","titleSource":"ROAD"} +{"issn":"2227-4782","issnL":"2227-4782","title":"Problemi endokrinnoï patologìï.","titleSource":"ROAD"} +{"issn":"2685-0079","issnL":"2597-4971","title":"Jurnal Kebijakan Pembangunan Daerah : Jurnal Penelitian dan Pengembangan Kebijakan Pembangunan Daerah.","titleSource":"ROAD"} +{"issn":"2574-0075","issnL":"2574-0075","title":"Hypermedia magazine.","titleSource":"ROAD"} +{"issn":"2574-0296","issnL":"2574-0296","title":"˜The œmuseum review.","titleSource":"ROAD"} +{"issn":"2574-0334","issnL":"2574-0334","title":"Bioactive compounds in health and disease.","titleSource":"ROAD"} +{"issn":"2574-108X","issnL":"2574-108X","title":"Journal of computer science integration.","titleSource":"ROAD"} +{"issn":"2574-254X","issnL":"2574-254X","title":"Child and adolescent obesity.","titleSource":"ROAD"} +{"issn":"2574-3325","issnL":"2574-3325","title":"Journal of research on the college president.","titleSource":"ROAD"} +{"issn":"2239-6101","issnL":"2239-5938","title":"European journal of sustainable development.","titleSource":"ROAD"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem new file mode 100644 index 000000000..403ffdf5d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem @@ -0,0 +1,29 @@ +{"id":"unibi","officialname":"JIMKESMAS (Jurnal Ilmiah Mahasiswa Kesehatan Masyarakat)","issn":"2502-731X","eissn":"","lissn":"2502-731X","openAccess":true} +{"id":"unibi","officialname":"Jurnal ilmu informasi, perpustakaan, dan kearsipan","issn":"2502-7409","eissn":"","lissn":"1411-0253","openAccess":true} +{"id":"unibi","officialname":"At-Tadbir : jurnal ilmiah manajemen","issn":"2502-7433","eissn":"","lissn":"2502-7433","openAccess":true} +{"id":"unibi","officialname":"Jurnal Kesehatan Panrita Husada.","issn":"2502-745X","eissn":"","lissn":"2502-745X","openAccess":true} +{"id":"unibi","officialname":"ELang journal (An English Education journal)","issn":"2502-7549","eissn":"","lissn":"2502-7549","openAccess":true} +{"id":"unibi","officialname":"̒Ulūm-i darmāngāhī-i dāmpizishkī-i Īrān.","issn":"2423-3633","eissn":"","lissn":"2423-3625","openAccess":true} +{"id":"unibi","officialname":"Pizhūhishnāmah-i ̒ilm/sanjī.","issn":"2423-5563","eissn":"","lissn":"2423-3773","openAccess":true} +{"id":"unibi","officialname":"Iranian journal of animal biosystematics.","issn":"1735-434X","eissn":"","lissn":"1735-434X","openAccess":true} +{"id":"unibi","officialname":"Majallah-i jangal-i Īrān.","issn":"2423-4435","eissn":"","lissn":"2008-6113","openAccess":true} +{"id":"unibi","officialname":"Ābziyān-i zinatī.","issn":"2423-4575","eissn":"","lissn":"2423-4575","openAccess":true} +{"id":"unibi","officialname":"Pizhūhishnāmah-i ravābiṭ-i biyn/al- milal.","issn":"2423-4974","eissn":"","lissn":"2423-4974","openAccess":true} +{"id":"unibi","officialname":"AIHM journal club.","issn":"2380-0607","eissn":"","lissn":"2380-0607","openAccess":true} +{"id":"unibi","officialname":"Frontiers.","issn":"1085-4568","eissn":"","lissn":"1085-4568","openAccess":true} +{"id":"unibi","officialname":"˜The œjournal of contemporary archival studies.","issn":"2380-8845","eissn":"","lissn":"2380-8845","openAccess":true} +{"id":"unibi","officialname":"International journal of complementary & alternative medicine.","issn":"2381-1803","eissn":"","lissn":"2381-1803","openAccess":true} +{"id":"unibi","officialname":"Palapala.","issn":"2381-2478","eissn":"","lissn":"2381-2478","openAccess":true} +{"id":"unibi","officialname":"Asia pacific journal of environment ecology and sustainable development.","issn":"2382-5170","eissn":"","lissn":"2382-5170","openAccess":true} +{"id":"unibi","officialname":"Majallah-i salāmat va bihdāsht","issn":"2382-9737","eissn":"","lissn":"2382-9737","openAccess":true} +{"id":"unibi","officialname":"UCT journal of research in science ,engineering and technology","issn":"2382-977X","eissn":"","lissn":"2382-977X","openAccess":true} +{"id":"unibi","officialname":"Bih/nizhādī-i giyāhān-i zirā̒ī va bāghī.","issn":"2382-9974","eissn":"","lissn":"2382-9974","openAccess":true} +{"id":"unibi","officialname":"Problemi endokrinnoï patologìï.","issn":"2227-4782","eissn":"","lissn":"2227-4782","openAccess":true} +{"id":"unibi","officialname":"Jurnal Kebijakan Pembangunan Daerah : Jurnal Penelitian dan Pengembangan Kebijakan Pembangunan Daerah.","issn":"2685-0079","eissn":"","lissn":"2597-4971","openAccess":true} +{"id":"unibi","officialname":"Hypermedia magazine.","issn":"2574-0075","eissn":"","lissn":"2574-0075","openAccess":true} +{"id":"unibi","officialname":"˜The œmuseum review.","issn":"2574-0296","eissn":"","lissn":"2574-0296","openAccess":true} +{"id":"unibi","officialname":"Bioactive compounds in health and disease.","issn":"2574-0334","eissn":"","lissn":"2574-0334","openAccess":true} +{"id":"unibi","officialname":"Journal of computer science integration.","issn":"2574-108X","eissn":"","lissn":"2574-108X","openAccess":true} +{"id":"unibi","officialname":"Child and adolescent obesity.","issn":"2574-254X","eissn":"","lissn":"2574-254X","openAccess":true} +{"id":"unibi","officialname":"Journal of research on the college president.","issn":"2574-3325","eissn":"","lissn":"2574-3325","openAccess":true} +{"id":"unibi","officialname":"European journal of sustainable development.","issn":"2239-6101","eissn":"","lissn":"2239-5938","openAccess":true} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json index befa722e1..42b140306 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json @@ -222,6 +222,11 @@ "type": "string", "value": "pubsrepository::journal@@@dnet:datasource_typologies" }, + { + "field": "datasourcetypeui", + "type": "string", + "value": "pubsrepository::journal@@@dnet:datasource_typologies_ui" + }, { "field": "provenanceaction", "type": "not_used", @@ -241,5 +246,27 @@ "field": "issnLinking", "type": "string", "value": "2579-5447" + }, + { + "field": "jurisdiction", + "type": "string", + "value": "National@@@eosc:jurisdictions" + }, + { + "field": "thematic", + "type": "boolean", + "value": true + }, + { + "field": "knowledgegraph", + "type": "boolean", + "value": true + }, + { + "field": "contentpolicies", + "type": "array", + "value": [ + "Journal article@@@eosc:contentpolicies" + ] } ] diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url.xml new file mode 100644 index 000000000..c9cafea4f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url.xml @@ -0,0 +1,40 @@ + + +
+ r3c4b2081b22::0001f1a60b1acbb28dd66b4f6c7d881f + https://www.ec.europa.eu/research/participants/documents/downloadPublic?documentIds=080166e5af388993&appId=PPGMS + 2021-06-24T10:40:25.346Z + r3c4b2081b22 + 2021-06-24T10:40:55.153Z +
+ + https://www.ec.europa.eu/research/participants/documents/downloadPublic?documentIds=080166e5af388993&appId=PPGMS + Progress report on preparing Strategic plan for Technology transfer from EPPL/FMPI CU + OPEN + + Documents, reports + 0034 + Progress report on preparation process of the technology transfer plan for CU. + corda__h2020::692335 + + + + + file%3A%2F%2F%2Fvar%2Flib%2Fdnet%2Fdata%2Fopendata%2Fcordis-h2020projectDeliverables.tsv + + + + + + + false + false + 0.9 + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url_odf.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url_odf.xml new file mode 100644 index 000000000..b970605a6 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url_odf.xml @@ -0,0 +1,75 @@ + + + + opentrials__::0000bf8e63d3d7e6b88421eabafae3f6 + feabb67c-1fd1-423b-aec6-606d04ce53c6 + 2019-03-27T15:15:22.22Z + opentrials__ + 2019-04-17T16:04:20.586Z + + + + https://clinicaltrials.gov/ct2/show/NCT02321059&test=yes + + http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059&test=yes + NCT02321059 + + + + Jensen, Kristian K + + + + Validation of the Goodstrength System for Assessment of Abdominal Wall Strength in Patients With Incisional Hernia + + nct + + Denmark + + 0037 + + Patients with an incisional hernia in the midline and controls with an intact abdominal wall are examined twice with one week apart, in order to establish the test-retest reliability and internal and external validity of the Goodstrength trunk dynamometer. + + + OPEN + 0037 + 2014-11-11 + + + + + false + false + 0.9 + + + + + + + + + file:///var/lib/dnet/data/opentrials/opentrials.csv + + + + + + + false + false + 0.9 + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml new file mode 100644 index 000000000..cead018d1 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml @@ -0,0 +1,61 @@ + + +
+ oai:e-book.fwf.ac.at:o:116 + 2020-04-08T13:25:21.742Z + text + 2021-08-12T00:16:55.365Z +
+ + + + 978-3-205-77704-5 + https://e-book.fwf.ac.at/o:116 + + + Although small and not particularly peoples both Chechens and Palestinians became famous for suicide bomber attacks in recent years. This can - partly - be explained by the unrecognised collective traumas of the past. Both Chechens and Palestinians experienced collective traumas in the 1940ties. The entire Chechen population wad deported by Josef Stalin to Kasakhstan, Kirgysia and Sibiria in February 1944 under the pretext of collaboration with the Third Reich. Those who survived were allowed to return in 1957 to Chechenya. Half of the Palestinian Arab population was expelled from Palestine in 1947/48, when fighting erupted between Jews and Arabs. The refugees were never allowed to return. The memory of the deportation/expulsion was kept alive. The founding traumas contributed to the development of Chechen and Palestinian nationalism. Chechens and Palestinians till today suffer from their collective traumas, which stayed unrecognised and therefore create psychological and political problems for the following generations - and for their adverseries. The phenomenon of the "closed circle of violence" created a phobic collective behaviour, which led for example Chechens to the illusionary declaration of independence in 1991. It also led to the individual overreaction of young Chechens or Palestinians, who became living bombs. The collective Trauma, if untreated, poses a threat to any peaceful political solution. + 1. Einleitung Ausgehend von der Fragestellung, warum gerade bei Tschetschenen und Palästinensern der Selbstmordterrorismus in den letzten Jahren so populär geworden ist, analysiert die Autorin die Geschichte dieser beiden Völker. Einer der Gründe ist bisher wenig beachtet worden. Der Einfluss eines kollektiven Traumas, das als solches nicht anerkannt, behandelt und auch nicht einer politischen Lösung zugeführt wurde. 2. Geschichte der Palästinenser und Tschetschenen Im Zuge der Errichtung Israels im Unabhängigkeitskrieg 1948 verlor die Hälfte des palästinensischen Volkes - 750.000 Menschen - ihre Heimat. Unter der Führung von Jassir Arafat kämpften sie in den Jahrzehnten danach - mit Gewalt und am Verhandlungstisch - um einen eigenen Staat. Das Recht auf Rückkehr spielte dabei immer eine besondere Rolle. Die "Nakbah", die als Katastrophe empfundene Vertreibung 1948, wurde dabei Bezugs- und Angelpunkt mehrer Generationen von Flüchtlingen. Die Weigerung Israels, die Mitverantwortung für die Vertreibung der Palästinenser zu übernehmen und das kollektive Trauma der Palästinenser anzuerkennen - aus Angst vor einer Infragestellung des eigenen Staates - ist einer der Gründe, warum der Nahostkonflikt bisher nicht gelöst werden konnte. Auch die Tschetschenen durften jahrzehntelang über die Deportation ihres Volkes nicht einmal sprechen. Hatte Josef Stallin sie erst unter dem Vorwand der Kollaboration mit Nazi-Deutschland deportiert, waren sie zwar nach seinem Tod in die Heimat zurückgekehrt, lebten dort aber jahrzehntelang weiterhin als "unzuverlässiges Volk". Das kollektive Trauma der Deportation konnte nur mündlich überliefert werden. Mit dem Zusammenbruch der Sowjetunion brach der ungelöste Konflikt zwischen Tschetschenien und Russland sofort auf, das Land ging in blutigen Kriegen unter. 3. Zusammenfassung Die kollektive Erinnerung ist in den vergangenen Jahrzehnten zu einem zentralen Forschungsthema geworden. Der vorsichtige Einsatz von in der Individualpsychologie gewonnenen Erkenntnissen in der Behandlung von kollektiven Traumata, um zu einer politischen Lösung zu kommen, ist eine Chance. Das Studium historischer Fakten in Kombination mit den Erkenntnissen der Psychologie und Psychiatrie bietet die Basis für eine politische Lösung. Die vorliegende Arbeit zeigt, dass kollektive Traumata, die nicht behandelt werden, immer wieder, auch Generationen später, zu kollektiven Reaktionen führen können, die auf den ersten Blick irrational erscheinen. Die vielleicht radikalste Form des politischen Widerstandes, das Selbstmordattentat, ist dafür ein Beispiel. + deu/ger + Böhlau + application/pdf + + Trauma und Terror: Zum palästinensischen und tschetschenischen Nationalismus + + + + Szyszkowitz, Tessa + Tessa + Szyszkowitz + + + + Trauma, Terror, Palestinians, Suicide attacks, Recognition, Chechens + ÖFOS 2002, Contemporary history + BIC Standard Subject Categories, Postwar 20th century history, from c 1945 to c 2000 (HBLW3) + ÖFOS 2002, Zeitgeschichte + + + 14.02 MB + + + 2007 + + + literature + 2007-01-01 + http://creativecommons.org/licenses/by-nc-nd/3.0/at/ + http://creativecommons.org/licenses/by-nc-nd/3.0/at/ + deu/ger + fwf_________::D 3929 + + + https://fedora.e-book.fwf.ac.at/fedora/get/o:116/bdef:Content/download + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/ls_result b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/ls_result new file mode 100644 index 000000000..98a0841c4 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/ls_result @@ -0,0 +1,1433 @@ +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0001.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0002.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0003.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0004.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0005.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0006.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0007.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0008.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0009.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0010.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0011.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0012.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0013.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0014.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0015.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0016.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0017.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0018.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0019.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0020.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0021.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0022.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0023.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0024.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0025.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0026.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0027.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0028.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0029.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0030.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0031.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0032.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0033.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0034.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0035.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0036.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0037.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0038.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0039.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0040.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0041.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0042.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0043.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0044.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0045.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0046.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0047.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0048.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0049.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0050.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0051.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0052.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0053.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0054.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0055.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0056.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0057.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0058.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0059.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0060.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0061.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0062.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0063.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0064.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0065.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0066.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0067.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0068.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0069.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0070.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0071.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0072.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0073.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0074.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0075.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0076.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0077.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0078.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0079.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0080.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0081.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0082.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0083.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0084.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0085.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0086.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0087.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0088.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0089.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0090.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0091.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0092.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0093.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0094.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0095.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0096.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0097.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0098.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0099.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0100.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0101.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0102.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0103.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0104.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0105.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0106.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0107.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0108.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0109.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0110.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0111.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0112.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0113.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0114.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0115.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0116.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0117.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0118.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0119.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0120.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0121.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0122.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0123.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0124.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0125.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0126.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0127.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0128.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0129.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0130.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0131.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0132.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0133.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0134.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0135.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0136.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0137.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0138.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0139.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0140.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0141.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0142.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0143.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0144.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0145.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0146.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0147.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0148.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0149.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0150.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0151.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0152.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0153.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0154.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0155.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0156.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0157.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0158.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0159.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0160.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0161.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0162.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0163.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0164.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0165.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0166.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0167.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0168.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0169.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0170.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0171.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0172.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0173.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0174.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0175.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0176.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0177.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0178.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0179.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0180.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0181.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0182.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0183.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0184.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0185.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0186.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0187.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0188.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0189.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0190.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0191.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0192.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0193.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0194.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0195.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0196.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0197.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0198.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0199.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0200.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0201.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0202.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0203.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0204.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0205.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0206.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0207.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0208.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0209.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0210.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0211.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0212.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0213.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0214.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0215.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0216.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0217.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0218.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0219.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0220.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0221.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0222.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0223.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0224.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0225.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0226.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0227.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0228.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0229.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0230.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0231.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0232.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0233.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0234.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0235.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0236.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0237.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0238.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0239.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0240.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0241.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0242.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0243.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0244.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0245.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0246.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0247.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0248.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0249.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0250.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0251.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0252.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0253.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0254.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0255.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0256.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0257.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0258.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0259.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0260.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0261.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0262.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0263.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0264.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0265.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0266.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0267.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0268.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0269.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0270.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0271.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0272.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0273.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0274.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0275.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0276.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0277.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0278.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0279.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0280.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0281.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0282.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0283.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0284.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0285.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0286.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0287.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0288.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0289.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0290.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0291.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0292.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0293.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0294.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0295.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0296.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0297.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0298.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0299.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0300.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0301.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0302.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0303.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0304.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0305.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0306.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0307.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0308.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0309.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0310.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0311.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0312.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0313.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0314.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0315.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0316.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0317.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0318.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0319.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0320.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0321.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0322.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0323.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0324.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0325.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0326.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0327.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0328.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0329.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0330.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0331.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0332.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0333.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0334.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0335.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0336.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0337.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0338.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0339.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0340.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0341.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0342.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0343.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0344.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0345.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0346.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0347.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0348.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0349.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0350.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0351.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0352.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0353.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0354.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0355.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0356.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0357.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0358.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0359.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0360.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0361.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0362.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0363.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0364.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0365.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0366.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0367.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0368.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0369.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0370.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0371.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0372.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0373.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0374.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0375.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0376.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0377.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0378.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0379.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0380.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0381.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0382.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0383.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0384.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0385.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0386.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0387.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0388.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0389.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0390.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0391.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0392.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0393.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0394.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0395.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0396.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0397.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0398.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0399.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0400.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0401.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0402.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0403.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0404.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0405.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0406.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0407.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0408.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0409.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0410.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0411.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0412.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0413.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0414.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0415.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0416.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0417.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0418.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0419.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0420.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0421.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0422.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0423.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0424.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0425.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0426.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0427.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0428.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0429.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0430.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0431.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0432.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0433.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0434.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0435.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0436.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0437.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0438.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0439.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0440.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0441.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0442.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0443.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0444.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0445.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0446.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0447.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0448.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0449.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0450.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0451.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0452.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0453.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0454.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0455.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0456.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0457.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0458.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0459.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0460.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0461.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0462.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0463.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0464.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0465.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0466.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0467.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0468.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0469.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0470.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0471.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0472.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0473.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0474.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0475.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0476.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0477.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0478.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0479.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0480.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0481.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0482.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0483.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0484.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0485.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0486.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0487.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0488.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0489.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0490.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0491.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0492.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0493.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0494.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0495.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0496.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0497.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0498.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0499.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0500.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0501.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0502.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0503.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0504.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0505.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0506.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0507.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0508.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0509.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0510.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0511.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0512.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0513.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0514.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0515.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0516.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0517.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0518.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0519.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0520.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0521.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0522.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0523.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0524.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0525.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0526.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0527.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0528.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0529.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0530.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0531.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0532.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0533.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0534.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0535.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0536.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0537.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0538.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0539.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0540.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0541.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0542.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0543.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0544.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0545.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0546.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0547.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0548.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0549.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0550.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0551.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0552.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0553.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0554.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0555.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0556.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0557.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0558.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0559.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0560.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0561.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0562.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0563.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0564.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0565.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0566.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0567.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0568.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0569.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0570.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0571.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0572.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0573.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0574.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0575.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0576.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0577.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0578.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0579.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0580.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0581.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0582.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0583.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0584.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0585.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0586.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0587.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0588.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0589.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0590.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0591.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0592.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0593.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0594.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0595.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0596.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0597.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0598.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0599.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0600.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0601.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0602.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0603.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0604.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0605.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0606.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0607.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0608.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0609.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0610.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0611.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0612.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0613.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0614.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0615.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0616.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0617.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0618.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0619.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0620.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0621.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0622.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0623.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0624.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0625.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0626.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0627.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0628.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0629.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0630.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0631.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0632.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0633.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0634.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0635.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0636.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0637.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0638.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0639.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0640.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0641.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0642.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0643.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0644.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0645.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0646.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0647.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0648.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0649.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0650.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0651.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0652.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0653.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0654.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0655.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0656.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0657.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0658.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0659.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0660.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0661.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0662.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0663.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0664.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0665.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0666.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0667.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0668.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0669.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0670.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0671.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0672.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0673.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0674.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0675.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0676.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0677.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0678.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0679.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0680.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0681.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0682.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0683.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0684.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0685.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0686.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0687.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0688.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0689.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0690.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0691.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0692.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0693.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0694.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0695.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0696.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0697.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0698.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0699.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0700.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0701.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0702.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0703.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0704.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0705.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0706.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0707.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0708.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0709.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0710.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0711.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0712.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0713.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0714.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0715.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0716.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0717.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0718.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0719.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0720.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0721.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0722.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0723.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0724.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0725.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0726.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0727.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0728.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0729.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0730.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0731.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0732.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0733.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0734.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0735.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0736.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0737.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0738.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0739.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0740.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0741.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0742.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0743.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0744.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0745.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0746.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0747.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0748.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0749.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0750.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0751.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0752.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0753.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0754.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0755.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0756.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0757.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0758.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0759.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0760.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0761.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0762.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0763.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0764.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0765.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0766.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0767.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0768.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0769.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0770.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0771.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0772.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0773.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0774.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0775.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0776.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0777.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0778.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0779.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0780.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0781.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0782.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0783.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0784.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0785.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0786.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0787.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0788.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0789.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0790.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0791.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0792.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0793.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0794.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0795.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0796.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0797.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0798.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0799.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0800.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0801.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0802.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0803.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0804.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0805.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0806.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0807.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0808.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0809.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0810.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0811.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0812.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0813.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0814.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0815.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0816.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0817.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0818.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0819.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0820.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0821.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0822.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0823.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0824.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0825.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0826.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0827.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0828.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0829.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0830.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0831.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0832.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0833.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0834.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0835.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0836.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0837.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0838.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0839.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0840.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0841.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0842.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0843.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0844.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0845.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0846.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0847.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0848.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0849.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0850.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0851.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0852.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0853.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0854.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0855.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0856.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0857.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0858.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0859.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0860.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0861.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0862.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0863.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0864.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0865.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0866.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0867.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0868.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0869.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0870.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0871.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0872.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0873.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0874.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0875.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0876.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0877.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0878.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0879.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0880.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0881.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0882.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0883.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0884.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0885.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0886.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0887.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0888.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0889.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0890.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0891.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0892.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0893.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0894.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0895.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0896.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0897.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0898.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0899.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0900.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0901.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0902.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0903.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0904.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0905.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0906.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0907.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0908.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0909.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0910.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0911.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0912.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0913.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0914.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0915.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0916.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0917.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0918.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0919.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0920.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0921.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0922.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0923.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0924.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0925.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0926.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0927.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0928.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0929.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0930.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0931.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0932.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0933.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0934.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0935.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0936.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0937.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0938.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0939.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0940.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0941.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0942.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0943.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0944.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0945.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0946.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0947.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0948.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0949.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0950.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0951.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0952.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0953.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0954.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0955.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0956.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0957.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0958.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0959.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0960.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0961.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0962.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0963.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0964.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0965.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0966.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0967.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0968.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0969.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0970.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0971.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0972.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0973.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0974.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0975.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0976.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0977.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0978.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0979.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0980.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0981.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0982.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0983.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0984.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0985.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0986.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0987.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0988.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0989.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0990.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0991.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0992.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0993.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0994.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0995.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0996.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0997.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0998.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0999.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1000.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1001.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1002.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1003.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1004.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1005.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1006.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1007.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1008.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1009.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1010.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1011.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1012.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1013.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1014.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1015.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1016.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1017.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1018.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1019.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1020.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1021.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1022.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1023.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1024.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1025.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1026.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1027.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1028.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1029.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1030.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1031.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1032.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1033.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1034.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1035.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1036.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1037.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1038.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1039.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1040.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1041.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1042.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1043.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1044.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1045.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1046.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1047.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1048.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1049.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1050.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1051.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1052.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1053.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1054.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1055.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1056.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1057.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1058.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1059.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1060.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1061.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1062.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1063.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1064.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1065.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1066.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1067.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1068.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1069.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1070.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1071.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1072.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1073.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1074.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1075.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1076.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1077.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1078.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1079.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1080.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1081.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1082.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1083.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1084.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1085.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1086.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1087.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1088.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1089.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1090.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1091.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1092.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1093.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1094.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1095.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1096.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1097.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1098.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1099.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1100.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1101.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1102.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1103.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1104.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1105.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1106.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1107.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1108.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1109.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1110.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1111.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1112.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1113.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1114.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1115.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1116.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1117.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1118.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1119.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1120.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1121.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1122.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1123.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1124.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1125.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1126.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1127.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1128.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1129.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1130.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1131.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1132.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1133.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1134.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1135.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1136.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1137.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1138.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1139.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1140.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1141.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1142.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1143.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1144.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1145.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1146.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1147.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1148.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1149.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1150.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1151.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1152.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1153.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1154.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1155.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1156.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1157.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1158.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1159.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1160.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1161.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1162.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1163.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1164.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1165.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1166.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1167.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1168.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1169.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1170.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1171.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1172.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1173.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1174.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1175.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1176.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1177.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1178.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1179.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1180.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1181.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1182.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1183.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1184.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1185.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1186.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1187.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1188.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1189.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1190.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1191.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1192.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1193.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1194.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1195.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1196.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1197.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1198.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1199.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1200.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1201.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1202.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1203.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1204.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1205.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1206.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1207.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1208.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1209.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1210.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1211.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1212.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1213.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1214.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1215.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1216.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1217.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1218.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1219.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1220.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1221.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1222.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1223.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1224.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1225.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1226.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1227.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1228.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1229.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1230.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1231.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1232.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1233.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1234.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1235.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1236.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1237.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1238.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1239.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1240.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1241.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1242.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1243.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1244.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1245.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1246.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1247.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1248.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1249.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1250.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1251.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1252.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1253.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1254.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1255.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1256.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1257.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1258.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1259.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1260.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1261.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1262.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1263.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1264.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1265.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1266.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1267.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1268.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1269.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1270.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1271.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1272.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1273.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1274.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1275.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1276.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1277.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1278.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1279.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1280.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1281.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1282.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1283.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1284.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1285.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1286.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1287.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1288.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1289.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1290.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1291.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1292.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1293.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1294.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1295.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1296.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1297.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1298.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1299.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1300.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1301.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1302.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1303.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1304.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1305.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1306.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1307.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1308.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1309.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1310.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1311.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1312.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1313.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1314.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1315.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1316.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1317.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1318.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1319.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1320.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1321.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1322.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1323.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1324.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1325.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1326.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1327.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1328.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1329.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1330.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1331.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1332.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1333.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1334.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1335.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1336.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1337.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1338.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1339.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1340.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1341.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1342.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1343.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1344.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1345.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1346.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1347.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1348.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1349.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1350.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1351.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1352.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1353.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1354.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1355.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1356.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1357.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1358.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1359.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1360.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1361.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1362.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1363.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1364.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1365.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1366.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1367.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1368.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1369.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1370.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1371.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1372.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1373.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1374.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1375.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1376.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1377.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1378.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1379.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1380.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1381.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1382.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1383.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1384.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1385.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1386.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1387.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1388.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1389.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1390.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1391.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1392.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1393.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1394.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1395.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1396.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1397.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1398.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1399.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1400.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1401.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1402.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1403.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1404.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1405.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1406.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1407.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1408.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1409.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1410.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1411.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1412.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1413.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1414.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1415.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1416.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1417.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1418.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1419.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1420.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1421.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1422.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1423.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1424.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1425.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1426.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1427.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1428.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1429.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1430.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1431.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1432.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1433.xml.gz \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/scholix/result.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/scholix/result.json new file mode 100644 index 000000000..f14d5fedf --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/scholix/result.json @@ -0,0 +1,508 @@ +{ + "collectedfrom": [ + { + "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c", + "value": "Europe PubMed Central", + "dataInfo": null + } + ], + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + }, + "lastupdatetimestamp": null, + "id": "50|pmid________::cd23b96c02d937c971c1b56d6aa0bf4f", + "originalId": [ + "10025635" + ], + "pid": [ + { + "value": "10025635", + "qualifier": { + "classid": "pmid", + "classname": "pmid", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "dateofcollection": null, + "dateoftransformation": null, + "extraInfo": null, + "oaiprovenance": null, + "measures": null, + "author": [ + { + "fullname": "D J, Marcellin-Little", + "name": "D J", + "surname": "Marcellin-Little", + "rank": 1, + "pid": null, + "affiliation": null + }, + { + "fullname": "B A, DeYoung", + "name": "B A", + "surname": "DeYoung", + "rank": 2, + "pid": null, + "affiliation": null + }, + { + "fullname": "D H, Doyens", + "name": "D H", + "surname": "Doyens", + "rank": 3, + "pid": null, + "affiliation": null + }, + { + "fullname": "D J, DeYoung", + "name": "D J", + "surname": "DeYoung", + "rank": 4, + "pid": null, + "affiliation": null + } + ], + "resulttype": { + "classid": "dataset", + "classname": "dataset", + "schemeid": "dnet:result_typologies", + "schemename": "dnet:result_typologies" + }, + "language": null, + "country": null, + "subject": [ + { + "value": "Animals", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Arthroplasty, Replacement, Hip", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Dogs", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Follow-Up Studies", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Hip Joint", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Hip Prosthesis", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Osseointegration", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Prospective Studies", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Radiography", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Survival Analysis", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Treatment Outcome", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "title": [ + { + "value": "Canine uncemented porous-coated anatomic total hip arthroplasty: results of a long-term prospective evaluation of 50 consecutive cases.", + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "relevantdate": null, + "description": [ + { + "value": "To evaluate the long-term clinical and radiographic results of a canine uncemented porous-coated anatomic (PCA) total hip arthroplasty (THA).Prospective study of consecutive clinical patients using survival analysis.Forty-one dogs that underwent PCA THA; nine had bilateral PCA THA (50 prostheses).Gait observation, orthopedic examination, and radiographic assessment were conducted before THA, 6 months after THA, and yearly thereafter. A zonal analysis system was used to document osseous changes in the femur and the acetabulum. Acetabular cup and femoral stem subsidence and migration, femoral canal fill, and implant orientation were measured. Survival analysis of the procedure was conducted.Long-term follow-up was available for 37 dogs (46 prostheses). The median follow-up was 63 months. Limb function was normal for 37 limbs and abnormal for 9 limbs because of dislocation (n = 3), lumbosacral disease (n = 2), degenerative myelopathy (n = 1), autoimmune disease (n = 1), brain tumor (n = 1), or osteosarcoma of the femur (n = 1). All prosthetic stems and cups were fixed by bone ingrowth fixation. Osteolysis was not observed. Bone infarction occurred in five femoral canals (four dogs). The 6-year survival rate for the procedure was 87% (95% confidence interval, 72%-96%).Long-term fixation of the uncemented PCA acetabular cup and stem is successful in dogs, and long-term clinical function is excellent.", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "dateofacceptance": { + "value": "1999-02-20", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "publisher": null, + "embargoenddate": null, + "source": null, + "fulltext": null, + "format": null, + "contributor": null, + "resourcetype": null, + "coverage": null, + "bestaccessright": null, + "context": null, + "externalReference": null, + "instance": [ + { + "license": null, + "accessright": null, + "instancetype": { + "classid": "0037", + "classname": "Clinical Trial", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + }, + "hostedby": null, + "url": [ + "https://pubmed.ncbi.nlm.nih.gov/10025635" + ], + "distributionlocation": null, + "collectedfrom": { + "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c", + "value": "Europe PubMed Central", + "dataInfo": null + }, + "pid": [ + { + "value": "10025635", + "qualifier": { + "classid": "pmid", + "classname": "pmid", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "alternateIdentifier": [ + { + "value": "10.1053/jvet.1999.0010", + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "dateofacceptance": { + "value": "1999-02-20", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "processingchargeamount": null, + "processingchargecurrency": null, + "refereed": null + } + ], + "storagedate": null, + "device": null, + "size": null, + "version": null, + "lastmetadataupdate": null, + "metadataversionnumber": null, + "geolocation": null +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java index 7534ce4bd..a33a45517 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java @@ -27,14 +27,20 @@ import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits; import scala.Tuple2; /** - * CreateRelatedEntitiesJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type - * E_i map E_i as RelatedEntity T_i to simplify the model and extracting only the necessary information join - * (R.target = T_i.id) save the tuples (R_i, T_i) + * CreateRelatedEntitiesJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type E_i map E_i as RelatedEntity + * T_i to simplify the model and extracting only the necessary information join (R.target = T_i.id) save the tuples (R_i, T_i) */ public class CreateRelatedEntitiesJob_phase1 { @@ -42,69 +48,75 @@ public class CreateRelatedEntitiesJob_phase1 { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { + public static void main(final String[] args) throws Exception { - String jsonConfiguration = IOUtils + final String jsonConfiguration = IOUtils .toString( PrepareRelationsJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json")); + .getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional + final Boolean isSparkSessionManaged = Optional .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - String inputRelationsPath = parser.get("inputRelationsPath"); + final String inputRelationsPath = parser.get("inputRelationsPath"); log.info("inputRelationsPath: {}", inputRelationsPath); - String inputEntityPath = parser.get("inputEntityPath"); + final String inputEntityPath = parser.get("inputEntityPath"); log.info("inputEntityPath: {}", inputEntityPath); - String outputPath = parser.get("outputPath"); + final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - String graphTableClassName = parser.get("graphTableClassName"); + final String graphTableClassName = parser.get("graphTableClassName"); log.info("graphTableClassName: {}", graphTableClassName); - Class entityClazz = (Class) Class.forName(graphTableClassName); + final Class entityClazz = (Class) Class.forName(graphTableClassName); - SparkConf conf = new SparkConf(); + final SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.registerKryoClasses(ProvisionModelSupport.getModelClasses()); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, outputPath); - joinRelationEntity(spark, inputRelationsPath, inputEntityPath, entityClazz, outputPath); - }); + runWithSparkSession(conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); + joinRelationEntity(spark, inputRelationsPath, inputEntityPath, entityClazz, outputPath); + }); } private static void joinRelationEntity( - SparkSession spark, - String inputRelationsPath, - String inputEntityPath, - Class clazz, - String outputPath) { + final SparkSession spark, + final String inputRelationsPath, + final String inputEntityPath, + final Class clazz, + final String outputPath) { - Dataset> relsByTarget = readPathRelation(spark, inputRelationsPath) + final Dataset> relsByTarget = readPathRelation(spark, inputRelationsPath) .map( (MapFunction>) r -> new Tuple2<>(r.getTarget(), r), Encoders.tuple(Encoders.STRING(), Encoders.kryo(Relation.class))) .cache(); - Dataset> entities = readPathEntity(spark, inputEntityPath, clazz) + readPathEntity(spark, inputEntityPath, clazz) .filter("dataInfo.invisible == false") .map( (MapFunction>) e -> new Tuple2<>(e.getId(), asRelatedEntity(e, clazz)), - Encoders.tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class))) - .cache(); + Encoders + .tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class))) + .write() + .mode(SaveMode.Overwrite) + .save("/tmp/beta_provision/working_dir/update_solr/join_partial/relatedEntities/" + clazz.getSimpleName()); + + final Dataset> entities = spark + .read() + .load("/tmp/beta_provision/working_dir/update_solr/join_partial/relatedEntities/" + clazz.getSimpleName()) + .as( + Encoders + .tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class))); relsByTarget .joinWith(entities, entities.col("_1").equalTo(relsByTarget.col("_1")), "inner") @@ -118,7 +130,9 @@ public class CreateRelatedEntitiesJob_phase1 { } private static Dataset readPathEntity( - SparkSession spark, String inputEntityPath, Class entityClazz) { + final SparkSession spark, + final String inputEntityPath, + final Class entityClazz) { log.info("Reading Graph table from: {}", inputEntityPath); return spark @@ -129,13 +143,14 @@ public class CreateRelatedEntitiesJob_phase1 { Encoders.bean(entityClazz)); } - public static RelatedEntity asRelatedEntity(E entity, Class clazz) { + public static RelatedEntity asRelatedEntity(final E entity, final Class clazz) { final RelatedEntity re = new RelatedEntity(); re.setId(entity.getId()); re.setType(EntityType.fromClass(clazz).name()); - re.setPid(entity.getPid()); + if (entity.getPid() != null) + re.setPid(entity.getPid().stream().limit(400).collect(Collectors.toList())); re.setCollectedfrom(entity.getCollectedfrom()); switch (EntityType.fromClass(clazz)) { @@ -143,7 +158,7 @@ public class CreateRelatedEntitiesJob_phase1 { case dataset: case otherresearchproduct: case software: - Result result = (Result) entity; + final Result result = (Result) entity; if (result.getTitle() != null && !result.getTitle().isEmpty()) { final StructuredProperty title = result.getTitle().stream().findFirst().get(); @@ -170,16 +185,17 @@ public class CreateRelatedEntitiesJob_phase1 { break; case datasource: - Datasource d = (Datasource) entity; + final Datasource d = (Datasource) entity; re.setOfficialname(getValue(d.getOfficialname())); re.setWebsiteurl(getValue(d.getWebsiteurl())); re.setDatasourcetype(d.getDatasourcetype()); + re.setDatasourcetypeui(d.getDatasourcetypeui()); re.setOpenairecompatibility(d.getOpenairecompatibility()); break; case organization: - Organization o = (Organization) entity; + final Organization o = (Organization) entity; re.setLegalname(getValue(o.getLegalname())); re.setLegalshortname(getValue(o.getLegalshortname())); @@ -187,14 +203,14 @@ public class CreateRelatedEntitiesJob_phase1 { re.setWebsiteurl(getValue(o.getWebsiteurl())); break; case project: - Project p = (Project) entity; + final Project p = (Project) entity; re.setProjectTitle(getValue(p.getTitle())); re.setCode(getValue(p.getCode())); re.setAcronym(getValue(p.getAcronym())); re.setContracttype(p.getContracttype()); - List> f = p.getFundingtree(); + final List> f = p.getFundingtree(); if (!f.isEmpty()) { re.setFundingtree(f.stream().map(s -> s.getValue()).collect(Collectors.toList())); } @@ -203,11 +219,11 @@ public class CreateRelatedEntitiesJob_phase1 { return re; } - private static String getValue(Field field) { + private static String getValue(final Field field) { return getFieldValueWithDefault(field, ""); } - private static T getFieldValueWithDefault(Field f, T defaultValue) { + private static T getFieldValueWithDefault(final Field f, final T defaultValue) { return Optional .ofNullable(f) .filter(Objects::nonNull) @@ -216,21 +232,21 @@ public class CreateRelatedEntitiesJob_phase1 { } /** - * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text - * file, + * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text file, * * @param spark * @param relationPath * @return the Dataset containing all the relationships */ private static Dataset readPathRelation( - SparkSession spark, final String relationPath) { + final SparkSession spark, + final String relationPath) { log.info("Reading relations from: {}", relationPath); return spark.read().load(relationPath).as(Encoders.bean(Relation.class)); } - private static void removeOutputDir(SparkSession spark, String path) { + private static void removeOutputDir(final SparkSession spark, final String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java index c013a2bf6..85fb4a6b2 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java @@ -49,9 +49,11 @@ public class CreateRelatedEntitiesJob_phase2 { String jsonConfiguration = IOUtils .toString( - PrepareRelationsJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase2.json")); + Objects + .requireNonNull( + CreateRelatedEntitiesJob_phase2.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase2.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java index b3f785492..fdf397ad7 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java @@ -131,14 +131,15 @@ public class PrepareRelationsJob { Set relationFilter, int sourceMaxRelations, int targetMaxRelations, int relPartitions) { JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath) - .filter(rel -> rel.getDataInfo().getDeletedbyinference() == false) - .filter(rel -> relationFilter.contains(StringUtils.lowerCase(rel.getRelClass())) == false); + .filter(rel -> !(rel.getSource().startsWith("unresolved") || rel.getTarget().startsWith("unresolved"))) + .filter(rel -> !rel.getDataInfo().getDeletedbyinference()) + .filter(rel -> !relationFilter.contains(StringUtils.lowerCase(rel.getRelClass()))); JavaRDD pruned = pruneRels( pruneRels( rels, - sourceMaxRelations, relPartitions, (Function) r -> r.getSource()), - targetMaxRelations, relPartitions, (Function) r -> r.getTarget()); + sourceMaxRelations, relPartitions, (Function) Relation::getSource), + targetMaxRelations, relPartitions, (Function) Relation::getTarget); spark .createDataset(pruned.rdd(), Encoders.bean(Relation.class)) .repartition(relPartitions) @@ -170,8 +171,8 @@ public class PrepareRelationsJob { .map( (MapFunction) s -> OBJECT_MAPPER.readValue(s, Relation.class), Encoders.kryo(Relation.class)) - .filter((FilterFunction) rel -> rel.getDataInfo().getDeletedbyinference() == false) - .filter((FilterFunction) rel -> relationFilter.contains(rel.getRelClass()) == false) + .filter((FilterFunction) rel -> !rel.getDataInfo().getDeletedbyinference()) + .filter((FilterFunction) rel -> !relationFilter.contains(rel.getRelClass())) .groupByKey( (MapFunction) Relation::getSource, Encoders.STRING()) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java index 28c1111d6..01d161b6b 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java @@ -3,6 +3,9 @@ package eu.dnetlib.dhp.oa.provision; public class ProvisionConstants { + private ProvisionConstants() { + } + public static final String LAYOUT = "index"; public static final String INTERPRETATION = "openaire"; public static final String SEPARATOR = "-"; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java index 410aff5ba..0033978bf 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java @@ -17,7 +17,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient; import eu.dnetlib.dhp.oa.provision.utils.ZkServers; import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class SolrAdminApplication implements Closeable { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java index b44ed7446..518f41120 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java @@ -22,7 +22,6 @@ import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Maps; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -42,61 +41,51 @@ public class XmlConverterJob { public static final String schemaLocation = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd"; - public static void main(String[] args) throws Exception { + public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( XmlConverterJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json"))); + .getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json"))); parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional + final Boolean isSparkSessionManaged = Optional .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - String inputPath = parser.get("inputPath"); + final String inputPath = parser.get("inputPath"); log.info("inputPath: {}", inputPath); - String outputPath = parser.get("outputPath"); + final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - String isLookupUrl = parser.get("isLookupUrl"); + final String isLookupUrl = parser.get("isLookupUrl"); log.info("isLookupUrl: {}", isLookupUrl); - String otherDsTypeId = parser.get("otherDsTypeId"); - log.info("otherDsTypeId: {}", otherDsTypeId); - - SparkConf conf = new SparkConf(); + final SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.registerKryoClasses(ProvisionModelSupport.getModelClasses()); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, outputPath); - convertToXml( - spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl), otherDsTypeId); - }); + runWithSparkSession(conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); + convertToXml(spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl)); + }); } private static void convertToXml( - SparkSession spark, - String inputPath, - String outputPath, - ContextMapper contextMapper, - String otherDsTypeId) { + final SparkSession spark, + final String inputPath, + final String outputPath, + final ContextMapper contextMapper) { final XmlRecordFactory recordFactory = new XmlRecordFactory( prepareAccumulators(spark.sparkContext()), contextMapper, false, - schemaLocation, - otherDsTypeId); + schemaLocation); final List paths = HdfsSupport .listFiles(inputPath, spark.sparkContext().hadoopConfiguration()); @@ -116,16 +105,15 @@ public class XmlConverterJob { .mapToPair( (PairFunction, Text, Text>) t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))) - .saveAsHadoopFile( - outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class); + .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class); } - private static void removeOutputDir(SparkSession spark, String path) { + private static void removeOutputDir(final SparkSession spark, final String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } - private static Map prepareAccumulators(SparkContext sc) { - Map accumulators = Maps.newHashMap(); + private static Map prepareAccumulators(final SparkContext sc) { + final Map accumulators = Maps.newHashMap(); accumulators .put( "resultResult_similarity_isAmongTopNSimilarDocuments", @@ -136,15 +124,13 @@ public class XmlConverterJob { sc.longAccumulator("resultResult_similarity_hasAmongTopNSimilarDocuments")); accumulators .put( - "resultResult_supplement_isSupplementTo", - sc.longAccumulator("resultResult_supplement_isSupplementTo")); + "resultResult_supplement_isSupplementTo", sc.longAccumulator("resultResult_supplement_isSupplementTo")); accumulators .put( "resultResult_supplement_isSupplementedBy", sc.longAccumulator("resultResult_supplement_isSupplementedBy")); accumulators - .put( - "resultResult_dedup_isMergedIn", sc.longAccumulator("resultResult_dedup_isMergedIn")); + .put("resultResult_dedup_isMergedIn", sc.longAccumulator("resultResult_dedup_isMergedIn")); accumulators.put("resultResult_dedup_merges", sc.longAccumulator("resultResult_dedup_merges")); accumulators @@ -152,16 +138,11 @@ public class XmlConverterJob { "resultResult_publicationDataset_isRelatedTo", sc.longAccumulator("resultResult_publicationDataset_isRelatedTo")); accumulators - .put( - "resultResult_relationship_isRelatedTo", - sc.longAccumulator("resultResult_relationship_isRelatedTo")); + .put("resultResult_relationship_isRelatedTo", sc.longAccumulator("resultResult_relationship_isRelatedTo")); accumulators - .put( - "resultProject_outcome_isProducedBy", - sc.longAccumulator("resultProject_outcome_isProducedBy")); + .put("resultProject_outcome_isProducedBy", sc.longAccumulator("resultProject_outcome_isProducedBy")); accumulators - .put( - "resultProject_outcome_produces", sc.longAccumulator("resultProject_outcome_produces")); + .put("resultProject_outcome_produces", sc.longAccumulator("resultProject_outcome_produces")); accumulators .put( "resultOrganization_affiliation_isAuthorInstitutionOf", @@ -184,9 +165,7 @@ public class XmlConverterJob { "organizationOrganization_dedup_isMergedIn", sc.longAccumulator("organizationOrganization_dedup_isMergedIn")); accumulators - .put( - "organizationOrganization_dedup_merges", - sc.longAccumulator("resultProject_outcome_produces")); + .put("organizationOrganization_dedup_merges", sc.longAccumulator("resultProject_outcome_produces")); accumulators .put( "datasourceOrganization_provision_isProvidedBy", diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java index a321bdba9..e7dbdbd2b 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java @@ -3,14 +3,12 @@ package eu.dnetlib.dhp.oa.provision; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; import java.io.StringReader; import java.io.StringWriter; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Optional; -import javax.swing.text.html.Option; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.stream.StreamResult; @@ -163,7 +161,7 @@ public class XmlIndexingJob { case HDFS: spark .createDataset( - docs.map(s -> new SerializableSolrInputDocument(s)).rdd(), + docs.map(SerializableSolrInputDocument::new).rdd(), Encoders.kryo(SerializableSolrInputDocument.class)) .write() .mode(SaveMode.Overwrite) @@ -174,14 +172,13 @@ public class XmlIndexingJob { } } - protected static String toIndexRecord(Transformer tr, final String record) { + protected static String toIndexRecord(Transformer tr, final String xmlRecord) { final StreamResult res = new StreamResult(new StringWriter()); try { - tr.transform(new StreamSource(new StringReader(record)), res); + tr.transform(new StreamSource(new StringReader(xmlRecord)), res); return res.getWriter().toString(); - } catch (Throwable e) { - log.error("XPathException on record: \n {}", record, e); - throw new IllegalArgumentException(e); + } catch (TransformerException e) { + throw new IllegalArgumentException("XPathException on record: \n" + xmlRecord, e); } } @@ -192,8 +189,6 @@ public class XmlIndexingJob { * @param xslt xslt for building the index record transformer * @param fields the list of fields * @return the javax.xml.transform.Transformer - * @throws ISLookUpException could happen - * @throws IOException could happen * @throws TransformerException could happen */ protected static String getLayoutTransformer(String format, String fields, String xslt) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java index 2eb9cf38b..0fb109fbb 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.oa.provision.model; import java.io.Serializable; -import java.util.ArrayList; import java.util.LinkedList; import java.util.List; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java index c09ed86e5..d4ee24c14 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java @@ -11,6 +11,9 @@ import eu.dnetlib.dhp.schema.common.ModelSupport; public class ProvisionModelSupport { + private ProvisionModelSupport() { + } + public static Class[] getModelClasses() { List> modelClasses = Lists.newArrayList(ModelSupport.getOafModelClasses()); modelClasses diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java index e15ceff76..5c78d1826 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java @@ -34,7 +34,6 @@ public class RelatedEntity implements Serializable { private Qualifier datasourcetype; private Qualifier datasourcetypeui; private Qualifier openairecompatibility; - // private String aggregatortype; // organization private String legalname; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelationKey.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelationKey.java index 463c15e9e..cf441a517 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelationKey.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelationKey.java @@ -18,17 +18,18 @@ public class SortableRelationKey implements Comparable, Ser static { weights.put(ModelConstants.PARTICIPATION, 0); - weights.put(ModelConstants.OUTCOME, 1); weights.put(ModelConstants.AFFILIATION, 2); weights.put(ModelConstants.DEDUP, 3); weights.put(ModelConstants.PUBLICATION_DATASET, 4); - weights.put(ModelConstants.CITATION, 5); - weights.put(ModelConstants.SUPPLEMENT, 6); - weights.put(ModelConstants.REVIEW, 7); - weights.put(ModelConstants.RELATIONSHIP, 8); + weights.put(ModelConstants.SUPPLEMENT, 5); + weights.put(ModelConstants.REVIEW, 6); + weights.put(ModelConstants.RELATIONSHIP, 7); + weights.put(ModelConstants.PART, 8); weights.put(ModelConstants.PROVISION, 9); - weights.put(ModelConstants.SIMILARITY, 10); + weights.put(ModelConstants.VERSION, 10); + weights.put(ModelConstants.SIMILARITY, 11); + weights.put(ModelConstants.CITATION, 12); } private static final long serialVersionUID = 3232323; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java index 7391569ed..a91050403 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java @@ -5,7 +5,6 @@ import java.util.Comparator; import java.util.Optional; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java index ac418f2b9..bcaf40603 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java @@ -9,6 +9,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.google.common.base.Joiner; @@ -23,7 +24,7 @@ public class ContextMapper extends HashMap implements Serial private static final String XQUERY = "for $x in //RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ContextDSResourceType']//*[name()='context' or name()='category' or name()='concept'] return "; public static ContextMapper fromIS(final String isLookupUrl) - throws DocumentException, ISLookUpException { + throws DocumentException, ISLookUpException, SAXException { ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); StringBuilder sb = new StringBuilder(""); Joiner.on("").appendTo(sb, isLookUp.quickSearchProfile(XQUERY)); @@ -31,10 +32,12 @@ public class ContextMapper extends HashMap implements Serial return fromXml(sb.toString()); } - public static ContextMapper fromXml(final String xml) throws DocumentException { + public static ContextMapper fromXml(final String xml) throws DocumentException, SAXException { final ContextMapper contextMapper = new ContextMapper(); - final Document doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(xml)); for (Object o : doc.selectNodes("//entry")) { Node node = (Node) o; String id = node.valueOf("./@id"); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java index d2131ef28..3750d0173 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java @@ -8,16 +8,18 @@ import java.util.Set; import com.google.common.collect.Sets; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; public class GraphMappingUtils { public static final String SEPARATOR = "_"; - public static Set authorPidTypes = Sets + public static final Set authorPidTypes = Sets .newHashSet( ModelConstants.ORCID, ModelConstants.ORCID_PENDING, "magidentifier"); + private GraphMappingUtils() { + } + public static String removePrefix(final String s) { if (s.contains("|")) return substringAfter(s, "|"); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java index 29a51cb29..8c7c61361 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java @@ -25,11 +25,9 @@ public class ISLookupClient { * * @param format the Metadata format name * @return the string representation of the list of fields to be indexed - * @throws ISLookUpDocumentNotFoundException * @throws ISLookUpException */ - public String getLayoutSource(final String format) - throws ISLookUpDocumentNotFoundException, ISLookUpException { + public String getLayoutSource(final String format) throws ISLookUpException { return doLookup( String .format( @@ -41,7 +39,6 @@ public class ISLookupClient { * Method retrieves from the information system the openaireLayoutToRecordStylesheet * * @return the string representation of the XSLT contained in the transformation rule profile - * @throws ISLookUpDocumentNotFoundException * @throws ISLookUpException */ public String getLayoutTransformer() throws ISLookUpException { @@ -78,9 +75,9 @@ public class ISLookupClient { } private String doLookup(String xquery) throws ISLookUpException { - log.info(String.format("running xquery: %s", xquery)); + log.info("running xquery: {}", xquery); final String res = getIsLookup().getResourceProfileByQuery(xquery); - log.info(String.format("got response (100 chars): %s", StringUtils.left(res, 100) + " ...")); + log.info("got response (100 chars): {} ...", StringUtils.left(res, 100)); return res; } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/LicenseComparator.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/LicenseComparator.java deleted file mode 100644 index 9dbac1936..000000000 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/LicenseComparator.java +++ /dev/null @@ -1,69 +0,0 @@ - -package eu.dnetlib.dhp.oa.provision.utils; - -import java.util.Comparator; - -import eu.dnetlib.dhp.schema.oaf.Qualifier; - -public class LicenseComparator implements Comparator { - - @Override - public int compare(Qualifier left, Qualifier right) { - - if (left == null && right == null) - return 0; - if (left == null) - return 1; - if (right == null) - return -1; - - String lClass = left.getClassid(); - String rClass = right.getClassid(); - - if (lClass.equals(rClass)) - return 0; - - if (lClass.equals("OPEN SOURCE")) - return -1; - if (rClass.equals("OPEN SOURCE")) - return 1; - - if (lClass.equals("OPEN")) - return -1; - if (rClass.equals("OPEN")) - return 1; - - if (lClass.equals("6MONTHS")) - return -1; - if (rClass.equals("6MONTHS")) - return 1; - - if (lClass.equals("12MONTHS")) - return -1; - if (rClass.equals("12MONTHS")) - return 1; - - if (lClass.equals("EMBARGO")) - return -1; - if (rClass.equals("EMBARGO")) - return 1; - - if (lClass.equals("RESTRICTED")) - return -1; - if (rClass.equals("RESTRICTED")) - return 1; - - if (lClass.equals("CLOSED")) - return -1; - if (rClass.equals("CLOSED")) - return 1; - - if (lClass.equals("UNKNOWN")) - return -1; - if (rClass.equals("UNKNOWN")) - return 1; - - // Else (but unlikely), lexicographical ordering will do. - return lClass.compareTo(rClass); - } -} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java index f16ee260f..36028be9e 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java @@ -3,10 +3,10 @@ package eu.dnetlib.dhp.oa.provision.utils; import java.io.StringReader; import java.io.StringWriter; -import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Objects; import javax.xml.stream.*; import javax.xml.stream.events.Namespace; @@ -57,13 +57,13 @@ public class StreamingInputDocumentFactory { private static final int MAX_FIELD_LENGTH = 25000; private final ThreadLocal inputFactory = ThreadLocal - .withInitial(() -> XMLInputFactory.newInstance()); + .withInitial(XMLInputFactory::newInstance); private final ThreadLocal outputFactory = ThreadLocal - .withInitial(() -> XMLOutputFactory.newInstance()); + .withInitial(XMLOutputFactory::newInstance); private final ThreadLocal eventFactory = ThreadLocal - .withInitial(() -> XMLEventFactory.newInstance()); + .withInitial(XMLEventFactory::newInstance); private final String version; @@ -126,6 +126,8 @@ public class StreamingInputDocumentFactory { return indexDocument; } catch (XMLStreamException e) { throw new IllegalStateException(e); + } finally { + inputFactory.remove(); } } @@ -143,9 +145,9 @@ public class StreamingInputDocumentFactory { /** * Parse the targetFields block and add fields to the solr document. * - * @param indexDocument - * @param parser - * @throws XMLStreamException + * @param indexDocument the document being populated + * @param parser the XML parser + * @throws XMLStreamException when the parser cannot parse the XML */ protected void parseTargetFields( final SolrInputDocument indexDocument, final XMLEventReader parser) @@ -165,9 +167,10 @@ public class StreamingInputDocumentFactory { final XMLEvent text = parser.nextEvent(); String data = getText(text); - - addField(indexDocument, fieldName, data); - hasFields = true; + if (Objects.nonNull(data)) { + addField(indexDocument, fieldName, data); + hasFields = true; + } } } @@ -192,36 +195,43 @@ public class StreamingInputDocumentFactory { final List nsList, final String dnetResult) throws XMLStreamException { + final XMLEventWriter writer = outputFactory.get().createXMLEventWriter(results); + final XMLEventFactory xmlEventFactory = this.eventFactory.get(); + try { - for (Namespace ns : nsList) { - eventFactory.get().createNamespace(ns.getPrefix(), ns.getNamespaceURI()); - } - - StartElement newRecord = eventFactory.get().createStartElement("", null, RESULT, null, nsList.iterator()); - - // new root record - writer.add(newRecord); - - // copy the rest as it is - while (parser.hasNext()) { - final XMLEvent resultEvent = parser.nextEvent(); - - // TODO: replace with depth tracking instead of close tag tracking. - if (resultEvent.isEndElement() - && resultEvent.asEndElement().getName().getLocalPart().equals(dnetResult)) { - writer.add(eventFactory.get().createEndElement("", null, RESULT)); - break; + for (Namespace ns : nsList) { + xmlEventFactory.createNamespace(ns.getPrefix(), ns.getNamespaceURI()); } - writer.add(resultEvent); + StartElement newRecord = xmlEventFactory.createStartElement("", null, RESULT, null, nsList.iterator()); + + // new root record + writer.add(newRecord); + + // copy the rest as it is + while (parser.hasNext()) { + final XMLEvent resultEvent = parser.nextEvent(); + + // TODO: replace with depth tracking instead of close tag tracking. + if (resultEvent.isEndElement() + && resultEvent.asEndElement().getName().getLocalPart().equals(dnetResult)) { + writer.add(xmlEventFactory.createEndElement("", null, RESULT)); + break; + } + + writer.add(resultEvent); + } + writer.close(); + indexDocument.addField(INDEX_RESULT, results.toString()); + } finally { + outputFactory.remove(); + eventFactory.remove(); } - writer.close(); - indexDocument.addField(INDEX_RESULT, results.toString()); } /** - * Helper used to add a field to a solr doc. It avoids to add empy fields + * Helper used to add a field to a solr doc, avoids adding empty fields * * @param indexDocument * @param field @@ -231,7 +241,6 @@ public class StreamingInputDocumentFactory { final SolrInputDocument indexDocument, final String field, final String value) { String cleaned = value.trim(); if (!cleaned.isEmpty()) { - // log.info("\n\n adding field " + field.toLowerCase() + " value: " + cleaned + "\n"); indexDocument.addField(field.toLowerCase(), cleaned); } } @@ -243,9 +252,9 @@ public class StreamingInputDocumentFactory { * @return the */ protected final String getText(final XMLEvent text) { - if (text.isEndElement()) // log.warn("skipping because isEndOfElement " + - // text.asEndElement().getName().getLocalPart()); + if (text.isEndElement()) { return ""; + } final String data = text.asCharacters().getData(); if (data != null && data.length() > MAX_FIELD_LENGTH) { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java index 173ba326a..7487f0956 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java @@ -50,7 +50,7 @@ public class TemplateFactory { public String getChild(final String name, final String id, final List metadata) { return getTemplate(resources.getChild()) .add("name", name) - .add("hasId", !(id == null)) + .add("hasId", id != null) .add("id", id != null ? escapeXml(removePrefix(id)) : "") .add("metadata", metadata) .render(); @@ -103,7 +103,7 @@ public class TemplateFactory { (webresources != null ? webresources : new ArrayList()) .stream() .filter(StringUtils::isNotBlank) - .map(w -> getWebResource(w)) + .map(this::getWebResource) .collect(Collectors.toList())) .render(); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 2c8240290..19300d77d 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -1,7 +1,8 @@ package eu.dnetlib.dhp.oa.provision.utils; -import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.*; +import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.authorPidTypes; +import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.getRelDescriptor; import static org.apache.commons.lang3.StringUtils.isNotBlank; import static org.apache.commons.lang3.StringUtils.substringBefore; @@ -9,10 +10,20 @@ import java.io.IOException; import java.io.Serializable; import java.io.StringReader; import java.io.StringWriter; -import java.util.*; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; -import javax.xml.transform.*; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; @@ -35,20 +46,38 @@ import com.google.common.collect.Sets; import com.mycila.xmltool.XMLDoc; import com.mycila.xmltool.XMLTag; -import eu.dnetlib.dhp.oa.provision.model.*; +import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; +import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; +import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.MainEntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.ExternalReference; +import eu.dnetlib.dhp.schema.oaf.ExtraInfo; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.Journal; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { - private final Map accumulators; + /** + * + */ + private static final long serialVersionUID = 2912912999272373172L; - private final Set specialDatasourceTypes; + private final Map accumulators; private final ContextMapper contextMapper; @@ -61,23 +90,20 @@ public class XmlRecordFactory implements Serializable { public XmlRecordFactory( final ContextMapper contextMapper, final boolean indent, - final String schemaLocation, - final String otherDatasourceTypesUForUI) { + final String schemaLocation) { - this(Maps.newHashMap(), contextMapper, indent, schemaLocation, otherDatasourceTypesUForUI); + this(Maps.newHashMap(), contextMapper, indent, schemaLocation); } public XmlRecordFactory( final Map accumulators, final ContextMapper contextMapper, final boolean indent, - final String schemaLocation, - final String otherDatasourceTypesUForUI) { + final String schemaLocation) { this.accumulators = accumulators; this.contextMapper = contextMapper; this.schemaLocation = schemaLocation; - this.specialDatasourceTypes = Sets.newHashSet(Splitter.on(",").trimResults().split(otherDatasourceTypesUForUI)); this.indent = indent; } @@ -87,8 +113,8 @@ public class XmlRecordFactory implements Serializable { final Set contexts = Sets.newHashSet(); // final OafEntity entity = toOafEntity(je.getEntity()); - OafEntity entity = je.getEntity(); - TemplateFactory templateFactory = new TemplateFactory(); + final OafEntity entity = je.getEntity(); + final TemplateFactory templateFactory = new TemplateFactory(); try { final EntityType type = EntityType.fromClass(entity.getClass()); @@ -110,11 +136,7 @@ public class XmlRecordFactory implements Serializable { final String body = templateFactory .buildBody( - mainType, - metadata, - relations, - listChildren(entity, je, templateFactory), - listExtraInfo(entity)); + mainType, metadata, relations, listChildren(entity, je, templateFactory), listExtraInfo(entity)); return printXML(templateFactory.buildRecord(entity, schemaLocation, body), indent); } catch (final Throwable e) { @@ -142,19 +164,19 @@ public class XmlRecordFactory implements Serializable { default: throw new IllegalArgumentException("invalid type: " + type); } - } catch (IOException e) { + } catch (final IOException e) { throw new IllegalArgumentException(e); } } - private String printXML(String xml, boolean indent) { + private String printXML(final String xml, final boolean indent) { try { final Document doc = new SAXReader().read(new StringReader(xml)); - OutputFormat format = indent ? OutputFormat.createPrettyPrint() : OutputFormat.createCompactFormat(); + final OutputFormat format = indent ? OutputFormat.createPrettyPrint() : OutputFormat.createCompactFormat(); format.setExpandEmptyElements(false); format.setSuppressDeclaration(true); - StringWriter sw = new StringWriter(); - XMLWriter writer = new XMLWriter(sw, format); + final StringWriter sw = new StringWriter(); + final XMLWriter writer = new XMLWriter(sw, format); writer.write(doc); return sw.toString(); } catch (IOException | DocumentException e) { @@ -163,7 +185,9 @@ public class XmlRecordFactory implements Serializable { } private List metadata( - final EntityType type, final OafEntity entity, final Set contexts) { + final EntityType type, + final OafEntity entity, + final Set contexts) { final List metadata = Lists.newArrayList(); @@ -230,72 +254,63 @@ public class XmlRecordFactory implements Serializable { .getAuthor() .stream() .filter(Objects::nonNull) - .map( - a -> { - final StringBuilder sb = new StringBuilder(" isNotBlank(sp.getQualifier().getClassid()) - && isNotBlank(sp.getValue())) - .collect( - Collectors - .toMap( - p -> getAuthorPidType(p.getQualifier().getClassid()), - p -> p, - (p1, p2) -> p1)) - .values() - .stream() - .collect( - Collectors - .groupingBy( - p -> p.getValue(), - Collectors - .mapping( - p -> p, - Collectors.minBy(new AuthorPidTypeComparator())))) - .values() - .stream() - .map(op -> op.get()) - .forEach( - sp -> { - String pidType = getAuthorPidType(sp.getQualifier().getClassid()); - String pidValue = XmlSerializationUtils.escapeXml(sp.getValue()); - - // ugly hack: some records provide swapped pidtype and pidvalue - if (authorPidTypes.contains(pidValue.toLowerCase().trim())) { - sb.append(String.format(" %s=\"%s\"", pidValue, pidType)); - } else { - if (isNotBlank(pidType)) { - sb - .append( - String - .format( - " %s=\"%s\"", - pidType, - pidValue - .toLowerCase() - .replaceAll("^.*orcid\\.org\\/", ""))); - } - } - }); - } + .map(a -> { + final StringBuilder sb = new StringBuilder("" + XmlSerializationUtils.escapeXml(a.getFullname()) + ""); - return sb.toString(); - }) + .append(" surname=\"" + XmlSerializationUtils.escapeXml(a.getSurname()) + "\""); + } + if (a.getPid() != null) { + a + .getPid() + .stream() + .filter(Objects::nonNull) + .filter( + sp -> isNotBlank(sp.getQualifier().getClassid()) + && isNotBlank(sp.getValue())) + .collect( + Collectors + .toMap( + p -> getAuthorPidType(p.getQualifier().getClassid()), p -> p, + (p1, p2) -> p1)) + .values() + .stream() + .collect( + Collectors + .groupingBy( + p -> p.getValue(), Collectors + .mapping( + p -> p, Collectors.minBy(new AuthorPidTypeComparator())))) + .values() + .stream() + .map(op -> op.get()) + .forEach(sp -> { + final String pidType = getAuthorPidType(sp.getQualifier().getClassid()); + final String pidValue = XmlSerializationUtils.escapeXml(sp.getValue()); + + // ugly hack: some records provide swapped pidtype and pidvalue + if (authorPidTypes.contains(pidValue.toLowerCase().trim())) { + sb.append(String.format(" %s=\"%s\"", pidValue, pidType)); + } else { + if (isNotBlank(pidType)) { + sb + .append( + String + .format( + " %s=\"%s\"", pidType, pidValue + .toLowerCase() + .replaceAll("^.*orcid\\.org\\/", ""))); + } + } + }); + } + sb + .append(">" + XmlSerializationUtils.escapeXml(a.getFullname()) + ""); + return sb.toString(); + }) .collect(Collectors.toList())); } if (r.getContributor() != null) { @@ -332,8 +347,7 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "dateofacceptance", r.getDateofacceptance().getValue())); + .asXmlElement("dateofacceptance", r.getDateofacceptance().getValue())); } if (r.getDescription() != null) { metadata @@ -347,8 +361,7 @@ public class XmlRecordFactory implements Serializable { } if (r.getEmbargoenddate() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate().getValue())); + .add(XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate().getValue())); } if (r.getSubject() != null) { metadata @@ -423,23 +436,20 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "lastmetadataupdate", d.getLastmetadataupdate().getValue())); + .asXmlElement("lastmetadataupdate", d.getLastmetadataupdate().getValue())); } if (d.getMetadataversionnumber() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "metadataversionnumber", d.getMetadataversionnumber().getValue())); + .asXmlElement("metadataversionnumber", d.getMetadataversionnumber().getValue())); } if (d.getSize() != null) { metadata.add(XmlSerializationUtils.asXmlElement("size", d.getSize().getValue())); } if (d.getStoragedate() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate().getValue())); + .add(XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate().getValue())); } if (d.getVersion() != null) { metadata.add(XmlSerializationUtils.asXmlElement("version", d.getVersion().getValue())); @@ -509,98 +519,87 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "codeRepositoryUrl", s.getCodeRepositoryUrl().getValue())); + .asXmlElement("codeRepositoryUrl", s.getCodeRepositoryUrl().getValue())); } if (s.getProgrammingLanguage() != null) { metadata .add( XmlSerializationUtils - .mapQualifier( - "programmingLanguage", s.getProgrammingLanguage())); + .mapQualifier("programmingLanguage", s.getProgrammingLanguage())); } break; case datasource: final Datasource ds = (Datasource) entity; if (ds.getDatasourcetype() != null) { - mapDatasourceType(metadata, ds.getDatasourcetype()); + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", ds.getDatasourcetype())); + } + if (ds.getDatasourcetypeui() != null) { + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", ds.getDatasourcetypeui())); } if (ds.getOpenairecompatibility() != null) { metadata .add( XmlSerializationUtils - .mapQualifier( - "openairecompatibility", ds.getOpenairecompatibility())); + .mapQualifier("openairecompatibility", ds.getOpenairecompatibility())); } if (ds.getOfficialname() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname().getValue())); + .add(XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname().getValue())); } if (ds.getEnglishname() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname().getValue())); + .add(XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname().getValue())); } if (ds.getWebsiteurl() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl().getValue())); + .add(XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl().getValue())); } if (ds.getLogourl() != null) { metadata.add(XmlSerializationUtils.asXmlElement("logourl", ds.getLogourl().getValue())); } if (ds.getContactemail() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail().getValue())); + .add(XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail().getValue())); } if (ds.getNamespaceprefix() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "namespaceprefix", ds.getNamespaceprefix().getValue())); + .asXmlElement("namespaceprefix", ds.getNamespaceprefix().getValue())); } if (ds.getLatitude() != null) { metadata.add(XmlSerializationUtils.asXmlElement("latitude", ds.getLatitude().getValue())); } if (ds.getLongitude() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude().getValue())); + .add(XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude().getValue())); } if (ds.getDateofvalidation() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "dateofvalidation", ds.getDateofvalidation().getValue())); + .asXmlElement("dateofvalidation", ds.getDateofvalidation().getValue())); } if (ds.getDescription() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue())); + .add(XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue())); } if (ds.getOdnumberofitems() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "odnumberofitems", ds.getOdnumberofitems().getValue())); + .asXmlElement("odnumberofitems", ds.getOdnumberofitems().getValue())); } if (ds.getOdnumberofitemsdate() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "odnumberofitemsdate", ds.getOdnumberofitemsdate().getValue())); + .asXmlElement("odnumberofitemsdate", ds.getOdnumberofitemsdate().getValue())); } if (ds.getOdpolicies() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies().getValue())); + .add(XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies().getValue())); } if (ds.getOdlanguages() != null) { metadata @@ -635,50 +634,43 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "releasestartdate", ds.getReleaseenddate().getValue())); + .asXmlElement("releasestartdate", ds.getReleaseenddate().getValue())); } if (ds.getReleaseenddate() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "releaseenddate", ds.getReleaseenddate().getValue())); + .asXmlElement("releaseenddate", ds.getReleaseenddate().getValue())); } if (ds.getMissionstatementurl() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "missionstatementurl", ds.getMissionstatementurl().getValue())); + .asXmlElement("missionstatementurl", ds.getMissionstatementurl().getValue())); } if (ds.getDataprovider() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "dataprovider", ds.getDataprovider().getValue().toString())); + .asXmlElement("dataprovider", ds.getDataprovider().getValue().toString())); } if (ds.getServiceprovider() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "serviceprovider", ds.getServiceprovider().getValue().toString())); + .asXmlElement("serviceprovider", ds.getServiceprovider().getValue().toString())); } if (ds.getDatabaseaccesstype() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "databaseaccesstype", ds.getDatabaseaccesstype().getValue())); + .asXmlElement("databaseaccesstype", ds.getDatabaseaccesstype().getValue())); } if (ds.getDatauploadtype() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "datauploadtype", ds.getDatauploadtype().getValue())); + .asXmlElement("datauploadtype", ds.getDatauploadtype().getValue())); } if (ds.getDatabaseaccessrestriction() != null) { metadata @@ -691,39 +683,33 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "datauploadrestriction", ds.getDatauploadrestriction().getValue())); + .asXmlElement("datauploadrestriction", ds.getDatauploadrestriction().getValue())); } if (ds.getVersioning() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "versioning", ds.getVersioning().getValue().toString())); + .asXmlElement("versioning", ds.getVersioning().getValue().toString())); } if (ds.getCitationguidelineurl() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "citationguidelineurl", ds.getCitationguidelineurl().getValue())); + .asXmlElement("citationguidelineurl", ds.getCitationguidelineurl().getValue())); } if (ds.getQualitymanagementkind() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "qualitymanagementkind", ds.getQualitymanagementkind().getValue())); + .asXmlElement("qualitymanagementkind", ds.getQualitymanagementkind().getValue())); } if (ds.getPidsystems() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue())); + .add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue())); } if (ds.getCertificates() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates().getValue())); + .add(XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates().getValue())); } if (ds.getPolicies() != null) { metadata @@ -749,6 +735,30 @@ public class XmlRecordFactory implements Serializable { .collect(Collectors.toList())); } + if (ds.getJurisdiction() != null) { + metadata.add(XmlSerializationUtils.mapQualifier("jurisdiction", ds.getJurisdiction())); + } + + if (ds.getThematic() != null) { + metadata.add(XmlSerializationUtils.asXmlElement("thematic", ds.getThematic().toString())); + } + + if (ds.getKnowledgegraph() != null) { + metadata + .add(XmlSerializationUtils.asXmlElement("knowledgegraph", ds.getKnowledgegraph().toString())); + } + + if (ds.getContentpolicies() != null) { + metadata + .addAll( + ds + .getContentpolicies() + .stream() + .filter(Objects::nonNull) + .map(q -> XmlSerializationUtils.mapQualifier("contentpolicy", q)) + .collect(Collectors.toList())); + } + break; case organization: final Organization o = (Organization) entity; @@ -757,13 +767,11 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "legalshortname", o.getLegalshortname().getValue())); + .asXmlElement("legalshortname", o.getLegalshortname().getValue())); } if (o.getLegalname() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("legalname", o.getLegalname().getValue())); + .add(XmlSerializationUtils.asXmlElement("legalname", o.getLegalname().getValue())); } if (o.getAlternativeNames() != null) { metadata @@ -777,8 +785,7 @@ public class XmlRecordFactory implements Serializable { } if (o.getWebsiteurl() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue())); + .add(XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue())); } if (o.getLogourl() != null) { metadata.add(XmlSerializationUtils.asXmlElement("logourl", o.getLogourl().getValue())); @@ -786,32 +793,27 @@ public class XmlRecordFactory implements Serializable { if (o.getEclegalbody() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody().getValue())); + .add(XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody().getValue())); } if (o.getEclegalperson() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson().getValue())); + .add(XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson().getValue())); } if (o.getEcnonprofit() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit().getValue())); } if (o.getEcresearchorganization() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "ecresearchorganization", o.getEcresearchorganization().getValue())); + .asXmlElement("ecresearchorganization", o.getEcresearchorganization().getValue())); } if (o.getEchighereducation() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "echighereducation", o.getEchighereducation().getValue())); + .asXmlElement("echighereducation", o.getEchighereducation().getValue())); } if (o.getEcinternationalorganizationeurinterests() != null) { metadata @@ -830,20 +832,17 @@ public class XmlRecordFactory implements Serializable { } if (o.getEcenterprise() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise().getValue())); } if (o.getEcsmevalidated() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "ecsmevalidated", o.getEcsmevalidated().getValue())); + .asXmlElement("ecsmevalidated", o.getEcsmevalidated().getValue())); } if (o.getEcnutscode() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode().getValue())); } if (o.getCountry() != null) { metadata.add(XmlSerializationUtils.mapQualifier("country", o.getCountry())); @@ -855,8 +854,7 @@ public class XmlRecordFactory implements Serializable { if (p.getWebsiteurl() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl().getValue())); + .add(XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl().getValue())); } if (p.getCode() != null) { metadata.add(XmlSerializationUtils.asXmlElement("code", p.getCode().getValue())); @@ -869,8 +867,7 @@ public class XmlRecordFactory implements Serializable { } if (p.getStartdate() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("startdate", p.getStartdate().getValue())); + .add(XmlSerializationUtils.asXmlElement("startdate", p.getStartdate().getValue())); } if (p.getEnddate() != null) { metadata.add(XmlSerializationUtils.asXmlElement("enddate", p.getEnddate().getValue())); @@ -879,8 +876,7 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "callidentifier", p.getCallidentifier().getValue())); + .asXmlElement("callidentifier", p.getCallidentifier().getValue())); } if (p.getKeywords() != null) { metadata.add(XmlSerializationUtils.asXmlElement("keywords", p.getKeywords().getValue())); @@ -890,8 +886,7 @@ public class XmlRecordFactory implements Serializable { } if (p.getEcarticle29_3() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3().getValue())); } if (p.getSubjects() != null) { metadata @@ -923,13 +918,11 @@ public class XmlRecordFactory implements Serializable { } if (p.getTotalcost() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("totalcost", p.getTotalcost().toString())); + .add(XmlSerializationUtils.asXmlElement("totalcost", p.getTotalcost().toString())); } if (p.getFundedamount() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("fundedamount", p.getFundedamount().toString())); + .add(XmlSerializationUtils.asXmlElement("fundedamount", p.getFundedamount().toString())); } if (p.getFundingtree() != null) { metadata @@ -950,28 +943,18 @@ public class XmlRecordFactory implements Serializable { return metadata; } - private String getAuthorPidType(String s) { + private String getAuthorPidType(final String s) { return XmlSerializationUtils .escapeXml(s) .replaceAll("\\W", "") .replaceAll("\\d", ""); } - private static boolean kvNotBlank(KeyValue kv) { + private static boolean kvNotBlank(final KeyValue kv) { return kv != null && StringUtils.isNotBlank(kv.getKey()) && StringUtils.isNotBlank(kv.getValue()); } - private void mapDatasourceType(List metadata, final Qualifier dsType) { - metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", dsType)); - - if (specialDatasourceTypes.contains(dsType.getClassid())) { - dsType.setClassid("other"); - dsType.setClassname("other"); - } - metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", dsType)); - } - - private List mapFields(RelatedEntityWrapper link, Set contexts) { + private List mapFields(final RelatedEntityWrapper link, final Set contexts) { final Relation rel = link.getRelation(); final RelatedEntity re = link.getTarget(); final String targetType = link.getTarget().getType(); @@ -987,16 +970,14 @@ public class XmlRecordFactory implements Serializable { } if (isNotBlank(re.getDateofacceptance())) { metadata - .add( - XmlSerializationUtils.asXmlElement("dateofacceptance", re.getDateofacceptance())); + .add(XmlSerializationUtils.asXmlElement("dateofacceptance", re.getDateofacceptance())); } if (isNotBlank(re.getPublisher())) { metadata.add(XmlSerializationUtils.asXmlElement("publisher", re.getPublisher())); } if (isNotBlank(re.getCodeRepositoryUrl())) { metadata - .add( - XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl())); + .add(XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl())); } if (re.getResulttype() != null && re.getResulttype().isBlank()) { metadata.add(XmlSerializationUtils.mapQualifier("resulttype", re.getResulttype())); @@ -1026,14 +1007,16 @@ public class XmlRecordFactory implements Serializable { metadata.add(XmlSerializationUtils.asXmlElement("officialname", re.getOfficialname())); } if (re.getDatasourcetype() != null && !re.getDatasourcetype().isBlank()) { - mapDatasourceType(metadata, re.getDatasourcetype()); + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", re.getDatasourcetype())); + } + if (re.getDatasourcetypeui() != null && !re.getDatasourcetypeui().isBlank()) { + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", re.getDatasourcetypeui())); } if (re.getOpenairecompatibility() != null && !re.getOpenairecompatibility().isBlank()) { metadata .add( XmlSerializationUtils - .mapQualifier( - "openairecompatibility", re.getOpenairecompatibility())); + .mapQualifier("openairecompatibility", re.getOpenairecompatibility())); } break; case organization: @@ -1042,8 +1025,7 @@ public class XmlRecordFactory implements Serializable { } if (isNotBlank(re.getLegalshortname())) { metadata - .add( - XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname())); + .add(XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname())); } if (re.getCountry() != null && !re.getCountry().isBlank()) { metadata.add(XmlSerializationUtils.mapQualifier("country", re.getCountry())); @@ -1085,8 +1067,10 @@ public class XmlRecordFactory implements Serializable { return metadata; } - private String mapRelation(Set contexts, TemplateFactory templateFactory, EntityType type, - RelatedEntityWrapper link) { + private String mapRelation(final Set contexts, + final TemplateFactory templateFactory, + final EntityType type, + final RelatedEntityWrapper link) { final Relation rel = link.getRelation(); final String targetType = link.getTarget().getType(); final String scheme = ModelSupport.getScheme(type.toString(), targetType); @@ -1096,8 +1080,9 @@ public class XmlRecordFactory implements Serializable { String.format("missing scheme for: <%s - %s>", type, targetType)); } final HashSet fields = Sets.newHashSet(mapFields(link, contexts)); - if (rel.getValidated() == null) + if (rel.getValidated() == null) { rel.setValidated(false); + } return templateFactory .getRel( targetType, rel.getTarget(), fields, rel.getRelClass(), scheme, rel.getDataInfo(), rel.getValidated(), @@ -1105,12 +1090,14 @@ public class XmlRecordFactory implements Serializable { } private List listChildren( - final OafEntity entity, JoinedEntity je, TemplateFactory templateFactory) { + final OafEntity entity, + final JoinedEntity je, + final TemplateFactory templateFactory) { final EntityType entityType = EntityType.fromClass(je.getEntity().getClass()); final List links = je.getLinks(); - List children = links + final List children = links .stream() .filter(link -> isDuplicate(link)) .map(link -> { @@ -1131,13 +1118,11 @@ public class XmlRecordFactory implements Serializable { if (instance.getAccessright() != null && !instance.getAccessright().isBlank()) { fields - .add( - XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright())); + .add(XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright())); } if (instance.getCollectedfrom() != null && kvNotBlank(instance.getCollectedfrom())) { fields - .add( - XmlSerializationUtils.mapKeyValue("collectedfrom", instance.getCollectedfrom())); + .add(XmlSerializationUtils.mapKeyValue("collectedfrom", instance.getCollectedfrom())); } if (instance.getHostedby() != null && kvNotBlank(instance.getHostedby())) { fields.add(XmlSerializationUtils.mapKeyValue("hostedby", instance.getHostedby())); @@ -1147,20 +1132,17 @@ public class XmlRecordFactory implements Serializable { fields .add( XmlSerializationUtils - .asXmlElement( - "dateofacceptance", instance.getDateofacceptance().getValue())); + .asXmlElement("dateofacceptance", instance.getDateofacceptance().getValue())); } if (instance.getInstancetype() != null && !instance.getInstancetype().isBlank()) { fields - .add( - XmlSerializationUtils.mapQualifier("instancetype", instance.getInstancetype())); + .add(XmlSerializationUtils.mapQualifier("instancetype", instance.getInstancetype())); } if (isNotBlank(instance.getDistributionlocation())) { fields .add( XmlSerializationUtils - .asXmlElement( - "distributionlocation", instance.getDistributionlocation())); + .asXmlElement("distributionlocation", instance.getDistributionlocation())); } if (instance.getPid() != null) { fields @@ -1185,8 +1167,7 @@ public class XmlRecordFactory implements Serializable { if (instance.getRefereed() != null && !instance.getRefereed().isBlank()) { fields - .add( - XmlSerializationUtils.mapQualifier("refereed", instance.getRefereed())); + .add(XmlSerializationUtils.mapQualifier("refereed", instance.getRefereed())); } if (instance.getProcessingchargeamount() != null && isNotBlank(instance.getProcessingchargeamount().getValue())) { @@ -1208,8 +1189,7 @@ public class XmlRecordFactory implements Serializable { children .add( templateFactory - .getInstance( - instance.getHostedby().getKey(), fields, instance.getUrl())); + .getInstance(instance.getHostedby().getKey(), fields, instance.getUrl())); } } final List ext = ((Result) entity).getExternalReference(); @@ -1254,11 +1234,11 @@ public class XmlRecordFactory implements Serializable { return children; } - private boolean isDuplicate(RelatedEntityWrapper link) { + private boolean isDuplicate(final RelatedEntityWrapper link) { return ModelConstants.DEDUP.equalsIgnoreCase(link.getRelation().getSubRelType()); } - private List listExtraInfo(OafEntity entity) { + private List listExtraInfo(final OafEntity entity) { final List extraInfo = entity.getExtraInfo(); return extraInfo != null ? extraInfo @@ -1271,7 +1251,7 @@ public class XmlRecordFactory implements Serializable { private List buildContexts(final String type, final Set contexts) { final List res = Lists.newArrayList(); - if ((contextMapper != null) + if (contextMapper != null && !contextMapper.isEmpty() && MainEntityType.result.toString().equals(type)) { @@ -1302,8 +1282,7 @@ public class XmlRecordFactory implements Serializable { if (def.getName().equals("category")) { final String rootId = substringBefore(def.getId(), "::"); document = addContextDef( - document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), - def); + document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def); } if (def.getName().equals("concept")) { @@ -1327,17 +1306,17 @@ public class XmlRecordFactory implements Serializable { private Transformer getTransformer() { try { - Transformer transformer = TransformerFactory.newInstance().newTransformer(); + final Transformer transformer = TransformerFactory.newInstance().newTransformer(); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); return transformer; - } catch (TransformerConfigurationException e) { + } catch (final TransformerConfigurationException e) { throw new IllegalStateException("unable to create javax.xml.transform.Transformer", e); } } private XMLTag addContextDef(final XMLTag tag, final ContextDef def) { tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel()); - if ((def.getType() != null) && !def.getType().isEmpty()) { + if (def.getType() != null && !def.getType().isEmpty()) { tag.addAttribute("type", def.getType()); } return tag; @@ -1374,16 +1353,14 @@ public class XmlRecordFactory implements Serializable { if (level0 != null) { final String level0Id = Joiner.on("::").join(funderShortName, level0.valueOf("./name")); contextMapper - .put( - level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", "")); + .put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", "")); final Node level1 = fundingPath.selectSingleNode("//funding_level_1"); if (level1 == null) { contexts.add(level0Id); } else { final String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name")); contextMapper - .put( - level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", "")); + .put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", "")); final Node level2 = fundingPath.selectSingleNode("//funding_level_2"); if (level2 == null) { contexts.add(level1Id); @@ -1391,8 +1368,7 @@ public class XmlRecordFactory implements Serializable { final String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name")); contextMapper .put( - level2Id, - new ContextDef(level2Id, level2.valueOf("./description"), "concept", "")); + level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", "")); contexts.add(level2Id); } } @@ -1413,8 +1389,7 @@ public class XmlRecordFactory implements Serializable { funding += getFunderElement(ftree); for (final Object o : Lists - .reverse( - ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) { + .reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) { final Element e = (Element) o; final String _id = e.valueOf("./id"); funding += "<" diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java index 8195467b1..213a62b32 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java @@ -11,9 +11,12 @@ public class XmlSerializationUtils { // XML 1.0 // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] - private static final String xml10pattern = "[^" + "\u0009\r\n" + "\u0020-\uD7FF" + "\uE000-\uFFFD" + private static final String XML_10_PATTERN = "[^" + "\u0009\r\n" + "\u0020-\uD7FF" + "\uE000-\uFFFD" + "\ud800\udc00-\udbff\udfff" + "]"; + private XmlSerializationUtils() { + } + public static String mapJournal(Journal j) { final String attrs = new StringBuilder() .append(attr("issn", j.getIssnPrinted())) @@ -50,12 +53,12 @@ public class XmlSerializationUtils { public static String escapeXml(final String value) { return value - .replaceAll("&", "&") - .replaceAll("<", "<") - .replaceAll(">", ">") - .replaceAll("\"", """) - .replaceAll("'", "'") - .replaceAll(xml10pattern, ""); + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace("\"", """) + .replace("'", "'") + .replaceAll(XML_10_PATTERN, ""); } public static String parseDataInfo(final DataInfo dataInfo) { @@ -70,18 +73,6 @@ public class XmlSerializationUtils { .toString(); } - private static StringBuilder dataInfoAsAttributes(final StringBuilder sb, final DataInfo info) { - return sb - .append( - attr("inferred", info.getInferred() != null ? info.getInferred().toString() : "")) - .append(attr("inferenceprovenance", info.getInferenceprovenance())) - .append( - attr( - "provenanceaction", - info.getProvenanceaction() != null ? info.getProvenanceaction().getClassid() : "")) - .append(attr("trust", info.getTrust())); - } - public static String mapKeyValue(final String name, final KeyValue kv) { return new StringBuilder() .append("<") diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java index 6cec3ed53..903150ca8 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java @@ -25,7 +25,7 @@ public class ZkServers { // quorum0:2182,quorum1:2182,quorum2:2182,quorum3:2182,quorum4:2182/solr-dev-openaire String urls = zkUrl; final Optional chRoot = Optional.of(SEPARATOR + StringUtils.substringAfterLast(zkUrl, SEPARATOR)); - if (chRoot.isPresent() && StringUtils.isNotBlank(chRoot.get())) { + if (StringUtils.isNotBlank(chRoot.get())) { log.debug(String.format("found zk chroot %s", chRoot)); urls = zkUrl.replace(chRoot.get(), ""); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java index ffeb0995d..e5faccd0f 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java @@ -45,6 +45,7 @@ public class DropAndCreateESIndex { .requireNonNull( DropAndCreateESIndex.class.getResourceAsStream("/eu/dnetlib/dhp/sx/provision/cluster.json"))); + @SuppressWarnings("unchecked") Map clusterMap = new ObjectMapper().readValue(clusterJson, Map.class); final String ip = clusterMap.get(cluster).split(",")[0]; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java index 3f842ef34..dd08215d5 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java @@ -44,6 +44,7 @@ public class SparkIndexCollectionOnES { .requireNonNull( DropAndCreateESIndex.class.getResourceAsStream("/eu/dnetlib/dhp/sx/provision/cluster.json"))); + @SuppressWarnings("unchecked") final Map clusterMap = new ObjectMapper().readValue(clusterJson, Map.class); final SparkSession spark = SparkSession.builder().config(conf).getOrCreate(); diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json index 32720514e..eda6154d7 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json @@ -16,11 +16,5 @@ "paramLongName": "isLookupUrl", "paramDescription": "URL of the isLookUp Service", "paramRequired": true - }, - { - "paramName": "odt", - "paramLongName": "otherDsTypeId", - "paramDescription": "list of datasource types to populate field datasourcetypeui", - "paramRequired": true } ] diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index 9280678c1..9f41805e2 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -25,10 +25,6 @@ targetMaxRelations maximum number of relations allowed for a each entity grouping by target - - otherDsTypeId - mapping used to populate datasourceTypeUi field - format metadata format name (DMF|TMF) @@ -582,7 +578,6 @@ --inputPath${workingDir}/join_entities --outputPath${workingDir}/xml --isLookupUrl${isLookupUrl} - --otherDsTypeId${otherDsTypeId}
diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index e1ed4ffe4..1c7dce3f2 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.provision; import static org.junit.jupiter.api.Assertions.assertNotNull; import java.io.IOException; -import java.util.List; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; @@ -16,11 +15,9 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; -import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory; @@ -49,7 +46,7 @@ public class IndexRecordTransformerTest { @Test public void testPreBuiltRecordTransformation() throws IOException, TransformerException { - String record = IOUtils.toString(getClass().getResourceAsStream("record.xml")); + final String record = IOUtils.toString(getClass().getResourceAsStream("record.xml")); testRecordTransformation(record); } @@ -57,14 +54,14 @@ public class IndexRecordTransformerTest { @Test public void testPublicationRecordTransformation() throws IOException, TransformerException { - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - XmlRecordFactoryTest.otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = load("publication.json", Publication.class); - Project pj = load("project.json", Project.class); - Relation rel = load("relToValidatedProject.json", Relation.class); + final Publication p = load("publication.json", Publication.class); + final Project pj = load("project.json", Project.class); + final Relation rel = load("relToValidatedProject.json", Relation.class); - JoinedEntity je = new JoinedEntity<>(p); + final JoinedEntity je = new JoinedEntity<>(p); je .setLinks( Lists @@ -72,24 +69,57 @@ public class IndexRecordTransformerTest { new RelatedEntityWrapper(rel, CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class)))); - String record = xmlRecordFactory.build(je); + final String record = xmlRecordFactory.build(je); assertNotNull(record); testRecordTransformation(record); } - private void testRecordTransformation(String record) throws IOException, TransformerException { - String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); - String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); + @Test + public void testForEOSCFutureDataTransferPilot() throws IOException, TransformerException { + final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/data-transfer-pilot.xml")); + testRecordTransformation(record); + } - String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt); + @Test + public void testForEOSCFutureTraining() throws IOException, TransformerException { + final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/training-notebooks-seadatanet.xml")); + testRecordTransformation(record); + } - Transformer tr = SaxonTransformerFactory.newInstance(transformer); + @Test + public void testForEOSCFutureAirQualityCopernicus() throws IOException, TransformerException { + final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/air-quality-copernicus.xml")); + testRecordTransformation(record); + } - String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record); + @Test + public void testForEOSCFutureB2SharePlotSw() throws IOException, TransformerException { + final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/b2share-plot-sw.xml")); + testRecordTransformation(record); + } - SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID).parseDocument(indexRecordXML); + @Test + public void testForEOSCFutureB2SharePlotRelatedORP() throws IOException, TransformerException { + final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/b2share-plot-related-orp.xml")); + testRecordTransformation(record); + } + + + + private void testRecordTransformation(final String record) throws IOException, TransformerException { + final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); + final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); + + final String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt); + + final Transformer tr = SaxonTransformerFactory.newInstance(transformer); + + final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record); + + final SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID) + .parseDocument(indexRecordXML); final String xmlDoc = ClientUtils.toXML(solrDoc); @@ -97,7 +127,7 @@ public class IndexRecordTransformerTest { System.out.println(xmlDoc); } - private T load(String fileName, Class clazz) throws IOException { + private T load(final String fileName, final Class clazz) throws IOException { return XmlRecordFactoryTest.OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream(fileName)), clazz); } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java index 6818cf6a5..c22a24185 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java @@ -1,6 +1,9 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -62,7 +65,7 @@ public class PrepareRelationsJobTest { } @Test - public void testRunPrepareRelationsJob(@TempDir Path testPath) throws Exception { + void testRunPrepareRelationsJob(@TempDir Path testPath) throws Exception { final int maxRelations = 20; PrepareRelationsJob @@ -83,7 +86,7 @@ public class PrepareRelationsJobTest { .as(Encoders.bean(Relation.class)) .cache(); - Assertions.assertEquals(maxRelations, out.count()); + assertEquals(maxRelations, out.count()); Dataset freq = out .toDF() @@ -97,13 +100,13 @@ public class PrepareRelationsJobTest { long participation = getRows(freq, PARTICIPATION).get(0).getAs("count"); long affiliation = getRows(freq, AFFILIATION).get(0).getAs("count"); - Assertions.assertTrue(participation == outcome); - Assertions.assertTrue(outcome > affiliation); - Assertions.assertTrue(participation > affiliation); + assertEquals(outcome, participation); + assertTrue(outcome > affiliation); + assertTrue(participation > affiliation); - Assertions.assertEquals(7, outcome); - Assertions.assertEquals(7, participation); - Assertions.assertEquals(6, affiliation); + assertEquals(7, outcome); + assertEquals(7, participation); + assertEquals(6, affiliation); } protected List getRows(Dataset freq, String col) { diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java index f57b8dcaf..9d5bff3cf 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java @@ -1,39 +1,42 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + import org.apache.solr.client.solrj.response.SolrPingResponse; import org.apache.solr.client.solrj.response.UpdateResponse; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -public class SolrAdminApplicationTest extends SolrTest { +class SolrAdminApplicationTest extends SolrTest { @Test - public void testPing() throws Exception { + void testPing() throws Exception { SolrPingResponse pingResponse = miniCluster.getSolrClient().ping(); log.info("pingResponse: '{}'", pingResponse.getStatus()); - Assertions.assertTrue(pingResponse.getStatus() == 0); + assertEquals(0, pingResponse.getStatus()); } @Test - public void testAdminApplication_DELETE() throws Exception { + void testAdminApplication_DELETE() throws Exception { SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost()); UpdateResponse rsp = (UpdateResponse) admin .execute(SolrAdminApplication.Action.DELETE_BY_QUERY, DEFAULT_COLLECTION, "*:*", false); - Assertions.assertTrue(rsp.getStatus() == 0); + assertEquals(0, rsp.getStatus()); } @Test - public void testAdminApplication_COMMIT() throws Exception { + void testAdminApplication_COMMIT() throws Exception { SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost()); UpdateResponse rsp = (UpdateResponse) admin.commit(DEFAULT_COLLECTION); - Assertions.assertTrue(rsp.getStatus() == 0); + assertEquals(0, rsp.getStatus()); } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java index 72f28fdf2..dc0a40471 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java @@ -28,7 +28,6 @@ public class SortableRelationKeyTest { .map(r -> SortableRelationKey.create(r, r.getSource())) .sorted() .forEach( - it -> { try { System.out.println(mapper.writeValueAsString(it)); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java index d7bcb3185..6f1956578 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java @@ -89,7 +89,7 @@ public class XmlIndexingJobTest extends SolrTest { } @Test - public void testXmlIndexingJob_onSolr() throws Exception { + void testXmlIndexingJob_onSolr() throws Exception { String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; @@ -112,7 +112,7 @@ public class XmlIndexingJobTest extends SolrTest { } @Test - public void testXmlIndexingJob_saveOnHDFS() throws Exception { + void testXmlIndexingJob_saveOnHDFS() throws Exception { final String ID_XPATH = "//header/*[local-name()='objIdentifier']"; String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 221049f90..2b5e08e92 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -1,7 +1,8 @@ package eu.dnetlib.dhp.oa.provision; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import java.io.IOException; import java.io.StringReader; @@ -21,37 +22,33 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; -import eu.dnetlib.dhp.oa.provision.utils.ContextDef; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; -import eu.dnetlib.dhp.schema.oaf.Dataset; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; public class XmlRecordFactoryTest { - public static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource"; - public static ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @Test public void testXMLRecordFactory() throws IOException, DocumentException { - ContextMapper contextMapper = new ContextMapper(); + final ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = OBJECT_MAPPER + final Publication p = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); - String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); + final String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); assertNotNull(xml); - Document doc = new SAXReader().read(new StringReader(xml)); + final Document doc = new SAXReader().read(new StringReader(xml)); assertNotNull(doc); @@ -74,30 +71,29 @@ public class XmlRecordFactoryTest { @Test public void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException { - ContextMapper contextMapper = new ContextMapper(); + final ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = OBJECT_MAPPER + final Publication p = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); - Project pj = OBJECT_MAPPER + final Project pj = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class); - Relation rel = OBJECT_MAPPER - .readValue( - (IOUtils.toString(getClass().getResourceAsStream("relToValidatedProject.json"))), Relation.class); - RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); - List links = Lists.newArrayList(); - RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); + final Relation rel = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("relToValidatedProject.json")), Relation.class); + final RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); + final List links = Lists.newArrayList(); + final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); links.add(rew); - JoinedEntity je = new JoinedEntity<>(p); + final JoinedEntity je = new JoinedEntity<>(p); je.setLinks(links); - String xml = xmlRecordFactory.build(je); + final String xml = xmlRecordFactory.build(je); assertNotNull(xml); - Document doc = new SAXReader().read(new StringReader(xml)); + final Document doc = new SAXReader().read(new StringReader(xml)); assertNotNull(doc); System.out.println(doc.asXML()); Assertions.assertEquals("2021-01-01", doc.valueOf("//validated/@date")); @@ -106,59 +102,31 @@ public class XmlRecordFactoryTest { @Test public void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException { - ContextMapper contextMapper = new ContextMapper(); + final ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = OBJECT_MAPPER + final Publication p = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); - Project pj = OBJECT_MAPPER + final Project pj = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class); - Relation rel = OBJECT_MAPPER - .readValue((IOUtils.toString(getClass().getResourceAsStream("relToProject.json"))), Relation.class); - RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); - List links = Lists.newArrayList(); - RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); + final Relation rel = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("relToProject.json")), Relation.class); + final RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); + final List links = Lists.newArrayList(); + final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); links.add(rew); - JoinedEntity je = new JoinedEntity<>(p); + final JoinedEntity je = new JoinedEntity<>(p); je.setLinks(links); - String xml = xmlRecordFactory.build(je); + final String xml = xmlRecordFactory.build(je); assertNotNull(xml); - Document doc = new SAXReader().read(new StringReader(xml)); + final Document doc = new SAXReader().read(new StringReader(xml)); assertNotNull(doc); System.out.println(doc.asXML()); assertEquals("", doc.valueOf("//rel/validated")); } - - @Test - public void testEnermapsRecord() throws IOException, DocumentException { - - String contextmap = "" - + - "" + - "" + - ""; - - ContextMapper contextMapper = ContextMapper.fromXml(contextmap); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); - - Dataset d = OBJECT_MAPPER - .readValue(IOUtils.toString(getClass().getResourceAsStream("enermaps.json")), Dataset.class); - - JoinedEntity je = new JoinedEntity<>(d); - - String xml = xmlRecordFactory.build(je); - - assertNotNull(xml); - - Document doc = new SAXReader().read(new StringReader(xml)); - assertNotNull(doc); - System.out.println(doc.asXML()); - assertEquals("enermaps::selection::tgs00004", doc.valueOf("//concept/@id")); - } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/air-quality-copernicus.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/air-quality-copernicus.xml new file mode 100644 index 000000000..43b256bbb --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/air-quality-copernicus.xml @@ -0,0 +1,114 @@ + + +
+ r37b0ad08687::a8df7db30ae0e4e0b875a098df7b652f + 2021-10-07T01:56:56Z + under curation + +
+ + + + + Using CAMS European air quality analysis from Copernicus + Atmosphere Monitoring with RELIANCE services + + Simone Mantovani + 2021-10-07 + + + + This notebook shows how to discover and access the Copernicus Atmosphere Monitoring products available in the RELIANCE datacube resources. + The process is structured in 6 steps, including example of data analysis and visualization with the Python libraries installed in the Jupyter environment + + + EOSC Jupyter Notebook + + RELIANCE + + Copernicus + + Air quality + + + + Zenodo + + + + + + + + + + + + + + + + + + + oai:zenodo.org:5554786 + + oai:zenodo.org:5554786 + + 10.5281/zenodo.5554786 + + + + false + false + 0.9 + + + + + + + corda__h2020::8771f523c34e38902d4921037d545ef8 + + REsearch LIfecycle mAnagemeNt for Earth Science Communities and CopErnicus users in EOSC + 101017501 + RELIANCE + + + ec__________::EC::H2020 + ec__________::EC::H2020::RIA + + + + + + + + + + + + https://zenodo.org/record/5554786 + + + + + + +
+
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-related-orp.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-related-orp.xml new file mode 100644 index 000000000..3c2c6440f --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-related-orp.xml @@ -0,0 +1,288 @@ + + +
+ doi_dedup___::44fd8a9b5b79adb0783ac245b21e3127 + 2019-09-19T07:43:31+0000 + 2019-09-19T07:43:31+0000 +
+ + + + + + + 10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + 10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + 6a93c069-a167-44cb-bfe8-74c275637347 + 50|r3730f562f9e::9b434fedc00d568b8e00611a7fa19f41 + 10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + ada23067-496a-494f-bd82-6ffe3cf4f0fb + 50|r3730f562f9e::b9cd774e8126b6902d56f9a4aa03e1dc + f3bd1041-422c-439d-8e68-c1d0711d130d + 50|r3730f562f9e::b847821a0ca5365b0d971dd89dea6bf1 + 10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + 10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + 10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + HCG16 L-band VLA C+D array final data + + + Jones, Michael G. + 2019-01-01 + These are the reduced final data associated with the paper Jones et al. 2019 submitted + to Astronomy & Astrophysics. They are used by a mybinder (https://gke.mybinder.org/) + executable environment to generate the final plots of that paper. The link for this environment + is https://mybinder.org/v2/gh/AMIGA-IAA/hcg-16/master. The raw VLA D and C array data of HCG 16 + were collected by the Very Large Array (http://www.vla.nrao.edu/) in 1989 and 1999, under PI + projects of Barbara Williams. The project numbers are AW234 and AW500 respectively. The file + also includes a grz colour image and r-band image from DECaLS DR8 + (http://legacysurvey.org/decamls/), a GBT HI spectrum published in Borthakur et al. 2010 (ApJ + 710, 385), an HI data cube from HIPASS (https://www.atnf.csiro.au/research/multibeam/release/), + and a source mask (and associated parameters file) for the HIPASS cube generated using SoFiA + (https://github.com/SoFiA-Admin/SoFiA-2). + + 3.5.2.1.1 → Observational astronomy → + Radio astronomy + + HI + + VLA + + HCG16 + + Various + + + 2019-01-01 + + https://b2share.eudat.eu + + + + true + false + 0.8 + dedup-similarity-result-decisiontree-v2 + + + + + userclaim___::ee29372a239b79db3ac4c5debe44d6e6 + + Plot scripts for HCG-16 Project + + + + + 2019-01-01 + HCG16 L-band VLA C+D + array final data + + + B2SHARE + + + 2019-01-01 + 10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + HCG16 L-band VLA C+D array final data + + + https://b2share.eudat.eu + + + 2019-01-01 + HCG16 L-band VLA C+D array final data + + + 10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + https://b2share.eudat.eu + + + 2019-01-01 + HCG16 L-band VLA C+D + array final data + + + B2SHARE + + + 2019-01-01 + HCG16 L-band VLA C+D array final data + + + https://b2share.eudat.eu + 10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + + + 2019-01-01 + HCG16 L-band VLA C+D + array final data + + + B2SHARE + + + + + + 2019-01-01 + + 10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + + + https://dx.doi.org/10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + + + + + + 2019-01-01 + + 10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + + + https://dx.doi.org/10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + + + + + + 2019-01-01 + + + https://doi.org10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + + + + http://dx.doi.org/https://doi.org/10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + + + + + + + 2019-01-01 + + 10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + + + https://dx.doi.org/10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + + + + + + 2019-01-01 + + + https://doi.org10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + + + + http://dx.doi.org/https://doi.org/10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + + + + + + + 2019-01-01 + + + https://doi.org10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + + + + http://dx.doi.org/https://doi.org/10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + + + + + + +
+
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-sw.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-sw.xml new file mode 100644 index 000000000..5f44f6b1f --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-sw.xml @@ -0,0 +1,112 @@ + + +
+ userclaim___::ee29372a239b79db3ac4c5debe44d6e6 + 2021-10-07T12:42:54Z + +
+ + + + + Plot scripts for HCG-16 Project + + Jones, Michael G. + Jones, Michael G. + 2021-09-30 + + + These are the notebooks to general the final data plots of the paper Jones et al. 2019 + submitted to Astronomy & Astrophysics. They can be used in a notebooks environment (like + https://notebooks.egi.eu/) with the proper libraries installed. A mybinder + (https://mybinder.org/) + ready version can be started from https://mybinder.org/v2/gh/AMIGA-IAA/hcg-16/master. Data to + generate plots is also available from B2SHARE: + https://b2share.eudat.eu/records/a69a7b2dcc22449e8734552dde4d3906 + + + EOSC Jupyter Notebook + + + B2SHARE + + + + + + + + + + + + + + + + + + + userclaim___::ee29372a239b79db3ac4c5debe44d6e6 + + 10.23728/b2share.adf6e2e942b04561a8640c449b48c14a + + + + false + false + 0.9 + + + + + + doi_dedup___::44fd8a9b5b79adb0783ac245b21e3127 + HCG16 L-band VLA C+D array final data + 2019-01-01 + https://b2share.eudat.eu + 10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + 10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + 10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + + + + + + + + + + + 2021-09-30 + + http://dx.doi.org/10.23728/b2share.adf6e2e942b04561a8640c449b48c14a + + + + + + +
+
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml new file mode 100644 index 000000000..6d2ac7630 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml @@ -0,0 +1,71 @@ + + +
+ doi_dedup___::ab57f086011a9ae23d1165211dc6e04b + 2020-11-03T05:39:50+0000 + 2020-11-03T05:39:50+0000 +
+ + + + EGI-Foundation/data-transfer-pilot: Include libraries in environment.yml + Giuseppe La Rocca + Enol Fernández + Andrea Manzi + 2020-11-03 + + + This notebook is used to demonstrate how a scientist from one of the PaNOSC RIs can use the resources provided by EGI to perform analysis on the data sets obtained during an expirement. + + EOSC Jupyter Notebook + 2020-11-03 + Zenodo + + + + + + + + + + + + + + + + + + + oai:zenodo.org:4218562 + + oai:zenodo.org:4218562 + 10.5281/zenodo.4195418 + 10.5281/zenodo.4218562 + + false + false + 0.9 + + + + + + + + + + + 2020-11-03 + + https://zenodo.org/record/4218562 + + + + + + +
+
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml new file mode 100644 index 000000000..9ab9b9861 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml @@ -0,0 +1,72 @@ + + +
+ doi_dedup___::8539a8de8996e01350f0de8ca4899b7f + 2021-09-22T08:53:13Z +
+ + + + + EGI-Foundation/training-notebooks-seadatanet: Version 0.4 + Enol Fernández + 2019-12-04 + + + A sample notebook using SeaDataNet data to plot a map that shows surface temperature of Black Sea, Arctic Sea and Baltic Sea. The data is available at EGI DataHub with PID http://hdl.handle.net/21.T15999/3Byz9Cw (run at EGI Notebooks service for easy access to data). This release uses the correct path of the data share from the EGI DataHub. + + EOSC Jupyter Notebook + + Zenodo + + + + + + + + + + + + + + + + + + + oai:zenodo.org:3561323 + + oai:zenodo.org:3561323 + 10.5281/zenodo.3561323 + 10.5281/zenodo.3443996 + 10.5281/zenodo.3475539 + 10.5281/zenodo.3475785 + + + false + false + 0.9 + + + + + + + + + + + + + https://zenodo.org/record/3561323 + + + + + + +
+
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml index c47975c9d..910a366f6 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml @@ -15,7 +15,13 @@ - + + + + + + + @@ -28,7 +34,8 @@ - + + @@ -79,6 +86,7 @@ + @@ -105,7 +113,7 @@ - + @@ -130,7 +138,7 @@ - + diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh index 6c5823b0c..6d42ab13d 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh @@ -16,7 +16,7 @@ curl -L ${CONTEXT_API}/contexts/?type=ri,community -H "accept: application/json" cat contexts.csv | cut -d , -f1 | xargs -I {} curl -L ${CONTEXT_API}/context/{}/?all=true | /usr/local/sbin/jq -r '.[]|"\(.id|split(":")[0]),\(.id),\(.label)"' > categories.csv cat categories.csv | cut -d , -f2 | sed 's/:/%3A/g'| xargs -I {} curl -L ${CONTEXT_API}/context/category/{}/?all=true | /usr/local/sbin/jq -r '.[]|"\(.id|split("::")[0])::\(.id|split("::")[1]),\(.id),\(.label)"' > concepts.csv cat contexts.csv | sed 's/^\(.*\),\(.*\)/\1,\1::other,\2/' >> categories.csv -cat categories.csv | grep -v ::other | sed 's/^.*,\(.*\),\(.*\)/\1,\1::other,\2/' >> concepts.csv +cat categories.csv | sed 's/^.*,\(.*\),\(.*\)/\1,\1::other,\2/' >> concepts.csv echo "uploading context data to hdfs" hdfs dfs -mkdir ${TMP} diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh index fb944f4ff..93faa43d6 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh @@ -15,5 +15,5 @@ hdfs dfs -copyToLocal $SCRIPT_PATH echo "Creating indicators" impala-shell -q "invalidate metadata" impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -c -f - -cat step16_7-createIndicatorsTables.sql | impala-shell -d $TARGET -f - +cat step16-createIndicatorsTables.sql | impala-shell -d $TARGET -f - echo "Indicators created" \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-post.sh similarity index 58% rename from dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh rename to dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-post.sh index ff03bca03..db8d39af2 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-post.sh @@ -9,16 +9,9 @@ fi export SOURCE=$1 export TARGET=$2 export SHADOW=$3 -export SCRIPT_PATH=$4 -echo "Getting file from " $4 -hdfs dfs -copyToLocal $4 - -echo "Creating observatory database" -impala-shell -q "drop database if exists ${TARGET} cascade" -impala-shell -q "create database if not exists ${TARGET}" -impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f - -cat step21-createObservatoryDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | impala-shell -f - +impala-shell -q "invalidate metadata;" +impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -f - echo "Impala shell finished" echo "Updating shadow observatory database" diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-pre.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-pre.sh new file mode 100644 index 000000000..55a308c50 --- /dev/null +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-pre.sh @@ -0,0 +1,16 @@ +export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs +export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami) +if ! [ -L $link_folder ] +then + rm -Rf "$link_folder" + ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder} +fi + +export SOURCE=$1 +export TARGET=$2 +export SHADOW=$3 + +echo "Creating observatory database" +impala-shell -q "drop database if exists ${TARGET} cascade" +impala-shell -q "create database if not exists ${TARGET}" +impala-shell -d ${SOURCE} -q "show tables" --delimited | grep -iv roar | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f - diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql index 77fbd3b18..fc0162a9c 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql @@ -23,6 +23,11 @@ CREATE OR REPLACE VIEW ${stats_db_name}.rndexpediture AS SELECT * FROM ${external_stats_db_name}.rndexpediture; +CREATE OR REPLACE VIEW ${stats_db_name}.licenses_normalized AS +SELECT * +FROM ${external_stats_db_name}.licenses_normalized; + + ------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------ -- Creation date of the database diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql similarity index 100% rename from dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql rename to dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql similarity index 83% rename from dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql rename to dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 020787039..0ea4a5adc 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -233,4 +233,50 @@ on p.id= tmp.id; create table indi_pub_has_abstract stored as parquet as select distinct publication.id, coalesce(abstract, 1) has_abstract -from publication; \ No newline at end of file +from publication; + +create table indi_with_orcid stored as parquet as +select distinct r.id, coalesce(has_orcid, 0) as has_orcid +from result r +left outer join (select id, 1 as has_orcid from result_orcid) tmp +on r.id= tmp.id + +create table indi_funded_result_with_fundref stored as parquet as +select distinct r.id, coalesce(fundref, 0) as fundref +from project_results r +left outer join (select distinct id, 1 as fundref from project_results +where provenance='Harvested') tmp +on r.id= tmp.id + +create table indi_result_org_country_collab stored as parquet as +with tmp as +(select o.id as id, o.country , ro.id as result,r.type from organization o +join result_organization ro on o.id=ro.organization +join result r on r.id=ro.id where o.country <> 'UNKNOWN') +select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations +from tmp as o1 +join tmp as o2 on o1.result=o2.result +where o1.id<>o2.id and o1.country<>o2.country +group by o1.id, o1.type,o2.country + +create table indi_result_org_collab stored as parquet as +with tmp as +(select o.id, ro.id as result,r.type from organization o +join result_organization ro on o.id=ro.organization +join result r on r.id=ro.id) +select o1.id org1,o2.id org2, o1.type, count(distinct o1.result) as collaborations +from tmp as o1 +join tmp as o2 on o1.result=o2.result +where o1.id<>o2.id +group by o1.id, o2.id, o1.type + +create table indi_result_org_country_collab stored as parquet as +with tmp as +(select o.id as id, o.country , ro.id as result,r.type from organization o +join result_organization ro on o.id=ro.organization +join result r on r.id=ro.id where o.country <> 'UNKNOWN') +select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations +from tmp as o1 +join tmp as o2 on o1.result=o2.result +where o1.id<>o2.id and o1.country<>o2.country +group by o1.id, o1.type,o2.country diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql deleted file mode 100644 index 481fd9e8c..000000000 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql +++ /dev/null @@ -1,62 +0,0 @@ ----------------------------------------------------- --- Shortcuts for various definitions in stats db --- ----------------------------------------------------- - --- Peer reviewed: --- Results that have been collected from Crossref -create table ${stats_db_name}.result_peerreviewed as -with peer_reviewed as ( - select distinct r.id as id - from ${stats_db_name}.result r - join ${stats_db_name}.result_sources rs on rs.id=r.id - join ${stats_db_name}.datasource d on d.id=rs.datasource - where d.name='Crossref') -select distinct peer_reviewed.id as id, true as peer_reviewed -from peer_reviewed -union all -select distinct r.id as id, false as peer_reviewed -from ${stats_db_name}.result r -left outer join peer_reviewed pr on pr.id=r.id -where pr.id is null; - --- Green OA: --- OA results that are hosted by an Institutional repository and have NOT been harvested from a DOAJ journal. -create table ${stats_db_name}.result_greenoa as -with result_green as ( - select distinct r.id as id - from ${stats_db_name}.result r - join ${stats_db_name}.result_datasources rd on rd.id=r.id - join ${stats_db_name}.datasource d on d.id=rd.datasource - left outer join ( - select rd.id from ${stats_db_name}.result_datasources rd - join ${stats_db_name}.datasource d on rd.datasource=d.id - join ${stats_db_name}.datasource_sources sds on sds.id=d.id - join ${stats_db_name}.datasource sd on sd.id=sds.datasource - where sd.name='DOAJ-ARTICLES' - ) as doaj on doaj.id=r.id - where r.bestlicence in ('Open Access', 'Open Source') and d.type='Institutional Repository' and doaj.id is null) -select distinct result_green.id, true as green -from result_green -union all -select distinct r.id as id, false as green -from ${stats_db_name}.result r -left outer join result_green rg on rg.id=r.id -where rg.id is null; - --- GOLD OA: --- OA results that have been harvested from a DOAJ journal. -create table ${stats_db_name}.result_gold as -with result_gold as ( - select distinct r.id as id - from ${stats_db_name}.result r - join ${stats_db_name}.result_datasources rd on rd.id=r.id - join ${stats_db_name}.datasource d on d.id=rd.datasource - join ${stats_db_name}.datasource_sources sds on sds.id=d.id - join ${stats_db_name}.datasource sd on sd.id=sds.datasource - where r.type='publication' and r.bestlicence='Open Access' and sd.name='DOAJ-Articles') -select distinct result_gold.id, true as gold -from result_gold -union all -select distinct r.id, false as gold -from ${stats_db_name}.result r -where r.id not in (select id from result_gold); \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql new file mode 100644 index 000000000..6b4d9b1b0 --- /dev/null +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql @@ -0,0 +1,22 @@ +---------------------------------------------------- +-- Shortcuts for various definitions in stats db --- +---------------------------------------------------- + +-- Peer reviewed: +create table ${stats_db_name}.result_peerreviewed as +select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed +from ${stats_db_name}.result r +left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id +left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id; + +-- Green OA: +create table ${stats_db_name}.result_greenoa as +select r.id, case when green.green_oa=1 then true else false end as green +from ${stats_db_name}.result r +left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id; + +-- GOLD OA: +create table ${stats_db_name}.result_gold as +select r.id, case when gold.gold_oa=1 then true else false end as gold +from ${stats_db_name}.result r + left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 5da028304..9ea50d488 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -104,25 +104,42 @@ create table TARGET.project_results as select id as result, project as id from T compute stats TARGET.project_results; -- indicators -create table TARGET.indi_pub_green_oa as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_pub_green_oa; - -create table TARGET.indi_pub_grey_lit as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_pub_grey_lit; - -create table TARGET.indi_pub_doi_from_crossref as select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_pub_doi_from_crossref; - -create table TARGET.indi_pub_gold_oa as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_pub_gold_oa; - +create view TARGET.indi_dataset_avg_year_content_oa as select * from SOURCE.indi_dataset_avg_year_content_oa orig; +create view TARGET.indi_dataset_avg_year_context_oa as select * from SOURCE.indi_dataset_avg_year_context_oa orig; create view TARGET.indi_dataset_avg_year_country_oa as select * from SOURCE.indi_dataset_avg_year_country_oa orig; + +create view TARGET.indi_other_avg_year_content_oa as select * from SOURCE.indi_other_avg_year_content_oa orig; +create view TARGET.indi_other_avg_year_context_oa as select * from SOURCE.indi_other_avg_year_context_oa orig; +create view TARGET.indi_other_avg_year_country_oa as select * from SOURCE.indi_other_avg_year_country_oa orig; + create view TARGET.indi_project_datasets_count as select * from SOURCE.indi_project_datasets_count orig; create view TARGET.indi_project_otherresearch_count as select * from SOURCE.indi_project_otherresearch_count orig; create view TARGET.indi_project_pubs_count as select * from SOURCE.indi_project_pubs_count orig; create view TARGET.indi_project_software_count as select * from SOURCE.indi_project_software_count orig; + +create view TARGET.indi_pub_avg_year_content_oa as select * from SOURCE.indi_pub_avg_year_content_oa orig; +create view TARGET.indi_pub_avg_year_context_oa as select * from SOURCE.indi_pub_avg_year_context_oa orig; create view TARGET.indi_pub_avg_year_country_oa as select * from SOURCE.indi_pub_avg_year_country_oa orig; +create table TARGET.indi_pub_green_oa as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_green_oa; +create table TARGET.indi_pub_grey_lit as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_grey_lit; +create table TARGET.indi_pub_doi_from_crossref as select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_doi_from_crossref; +create table TARGET.indi_pub_gold_oa as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_gold_oa; +create table TARGET.indi_pub_has_abstract as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_has_abstract; +create table TARGET.indi_pub_has_cc_licence as select * from SOURCE.indi_pub_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_has_cc_licence; +create table TARGET.indi_pub_has_cc_licence_url as select * from SOURCE.indi_pub_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_has_cc_licence_url; + +create view TARGET.indi_software_avg_year_content_oa as select * from SOURCE.indi_software_avg_year_content_oa orig; +create view TARGET.indi_software_avg_year_context_oa as select * from SOURCE.indi_software_avg_year_context_oa orig; +create view TARGET.indi_software_avg_year_country_oa as select * from SOURCE.indi_software_avg_year_country_oa orig; + --denorm alter table TARGET.result rename to TARGET.res_tmp; diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql index 40cdf3f6d..e24370e7d 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql @@ -1,259 +1,561 @@ -create table TARGET.result_affiliated_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, c.code as ccode, c.name as cname -from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, c.code, c.name; +create table ${observatory_db_name}.result_cc_licence stored as parquet as +select r.id, coalesce(rln.count, 0) > 0 as cc_licence +from ${stats_db_name}.result r + left outer join ( + select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + from ${stats_db_name}.result_licenses rl + left outer join ${stats_db_name}.licenses_normalized rln on rl.type=rln.license + group by rl.id +) rln on rln.id=r.id; -create table TARGET.result_affiliated_year stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, r.year -from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year; +create table ${observatory_db_name}.result_affiliated_country stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + c.code as ccode, c.name as cname +from ${stats_db_name}.result r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.organization o on o.id=ro.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name; -create table TARGET.result_affiliated_year_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, r.year, c.code as ccode, c.name as cname -from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year, c.code, c.name; +create table ${observatory_db_name}.result_affiliated_year stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + r.year +from ${stats_db_name}.result r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.organization o on o.id=ro.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year; -create table TARGET.result_affiliated_datasource stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, d.name as dname -from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_datasources rd on rd.id=r.id -left outer join SOURCE.datasource d on d.id=rd.datasource -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name; +create table ${observatory_db_name}.result_affiliated_year_country stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + r.year, c.code as ccode, c.name as cname +from ${stats_db_name}.result r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.organization o on o.id=ro.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name; -create table TARGET.result_affiliated_datasource_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, d.name as dname, c.code as ccode, c.name as cname -from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_datasources rd on rd.id=r.id -left outer join SOURCE.datasource d on d.id=rd.datasource -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name, c.code, c.name; +create table ${observatory_db_name}.result_affiliated_datasource stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + d.name as dname +from ${stats_db_name}.result r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.organization o on o.id=ro.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_datasources rd on rd.id=r.id + left outer join ${stats_db_name}.datasource d on d.id=rd.datasource + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name; -create table TARGET.result_affiliated_organization stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, o.name as oname -from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name; +create table ${observatory_db_name}.result_affiliated_datasource_country stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + d.name as dname, c.code as ccode, c.name as cname +from ${stats_db_name}.result r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.organization o on o.id=ro.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_datasources rd on rd.id=r.id + left outer join ${stats_db_name}.datasource d on d.id=rd.datasource + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name; -create table TARGET.result_affiliated_organization_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, o.name as oname, c.code as ccode, c.name as cname -from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name, c.code, c.name; +create table ${observatory_db_name}.result_affiliated_organization stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + o.name as oname +from ${stats_db_name}.result r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.organization o on o.id=ro.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name; -create table TARGET.result_affiliated_funder stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, p.funder as pfunder -from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -join SOURCE.result_projects rp on rp.id=r.id -join SOURCE.project p on p.id=rp.project -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder; +create table ${observatory_db_name}.result_affiliated_organization_country stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + o.name as oname, c.code as ccode, c.name as cname +from ${stats_db_name}.result r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.organization o on o.id=ro.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name; -create table TARGET.result_affiliated_funder_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, p.funder as pfunder, c.code as ccode, c.name as cname -from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -join SOURCE.result_projects rp on rp.id=r.id -join SOURCE.project p on p.id=rp.project -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder, c.code, c.name; +create table ${observatory_db_name}.result_affiliated_funder stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + p.funder as pfunder +from ${stats_db_name}.result r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.organization o on o.id=ro.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + join ${stats_db_name}.result_projects rp on rp.id=r.id + join ${stats_db_name}.project p on p.id=rp.project + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder; -create table TARGET.result_deposited_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, c.code as ccode, c.name as cname -from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, c.code, c.name; +create table ${observatory_db_name}.result_affiliated_funder_country stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + p.funder as pfunder, c.code as ccode, c.name as cname +from ${stats_db_name}.result r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.organization o on o.id=ro.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + join ${stats_db_name}.result_projects rp on rp.id=r.id + join ${stats_db_name}.project p on p.id=rp.project + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name; -create table TARGET.result_deposited_year stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, r.year -from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year; +create table ${observatory_db_name}.result_deposited_country stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + c.code as ccode, c.name as cname +from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join ${stats_db_name}.datasource_organizations dor on dor.id=d.id + join ${stats_db_name}.organization o on o.id=dor.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name; -create table TARGET.result_deposited_year_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, r.year, c.code as ccode, c.name as cname -from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year, c.code, c.name; +create table ${observatory_db_name}.result_deposited_year stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + r.year +from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join ${stats_db_name}.datasource_organizations dor on dor.id=d.id + join ${stats_db_name}.organization o on o.id=dor.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year; -create table TARGET.result_deposited_datasource stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, d.name as dname -from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name; +create table ${observatory_db_name}.result_deposited_year_country stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + r.year, c.code as ccode, c.name as cname +from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join ${stats_db_name}.datasource_organizations dor on dor.id=d.id + join ${stats_db_name}.organization o on o.id=dor.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name; -create table TARGET.result_deposited_datasource_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, d.name as dname, c.code as ccode, c.name as cname -from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name, c.code, c.name; +create table ${observatory_db_name}.result_deposited_datasource stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + d.name as dname +from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join ${stats_db_name}.datasource_organizations dor on dor.id=d.id + join ${stats_db_name}.organization o on o.id=dor.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name; -create table TARGET.result_deposited_organization stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, o.name as oname -from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name; +create table ${observatory_db_name}.result_deposited_datasource_country stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + d.name as dname, c.code as ccode, c.name as cname +from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join ${stats_db_name}.datasource_organizations dor on dor.id=d.id + join ${stats_db_name}.organization o on o.id=dor.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name; -create table TARGET.result_deposited_organization_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, o.name as oname, c.code as ccode, c.name as cname -from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name, c.code, c.name; +create table ${observatory_db_name}.result_deposited_organization stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + o.name as oname +from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join ${stats_db_name}.datasource_organizations dor on dor.id=d.id + join ${stats_db_name}.organization o on o.id=dor.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name; -create table TARGET.result_deposited_funder stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, p.funder as pfunder -from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -join SOURCE.result_projects rp on rp.id=r.id -join SOURCE.project p on p.id=rp.project -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder; +create table ${observatory_db_name}.result_deposited_organization_country stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + o.name as oname, c.code as ccode, c.name as cname +from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join ${stats_db_name}.datasource_organizations dor on dor.id=d.id + join ${stats_db_name}.organization o on o.id=dor.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name; -create table TARGET.result_deposited_funder_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, p.funder as pfunder, c.code as ccode, c.name as cname -from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -join SOURCE.result_projects rp on rp.id=r.id -join SOURCE.project p on p.id=rp.project -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder, c.code, c.name; +create table ${observatory_db_name}.result_deposited_funder stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + p.funder as pfunder +from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join ${stats_db_name}.datasource_organizations dor on dor.id=d.id + join ${stats_db_name}.organization o on o.id=dor.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + join ${stats_db_name}.result_projects rp on rp.id=r.id + join ${stats_db_name}.project p on p.id=rp.project + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder; -compute stats TARGET.result_affiliated_country; -compute stats TARGET.result_affiliated_year; -compute stats TARGET.result_affiliated_year_country; -compute stats TARGET.result_affiliated_datasource; -compute stats TARGET.result_affiliated_datasource_country; -compute stats TARGET.result_affiliated_organization; -compute stats TARGET.result_affiliated_organization_country; -compute stats TARGET.result_affiliated_funder; -compute stats TARGET.result_affiliated_funder_country; -compute stats TARGET.result_deposited_country; -compute stats TARGET.result_deposited_year; -compute stats TARGET.result_deposited_year_country; -compute stats TARGET.result_deposited_datasource; -compute stats TARGET.result_deposited_datasource_country; -compute stats TARGET.result_deposited_organization; -compute stats TARGET.result_deposited_organization_country; -compute stats TARGET.result_deposited_funder; -compute stats TARGET.result_deposited_funder_country; +create table ${observatory_db_name}.result_deposited_funder_country stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + p.funder as pfunder, c.code as ccode, c.name as cname +from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join ${stats_db_name}.datasource_organizations dor on dor.id=d.id + join ${stats_db_name}.organization o on o.id=dor.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + join ${stats_db_name}.result_projects rp on rp.id=r.id + join ${stats_db_name}.project p on p.id=rp.project + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index a329ca4bf..08d33f4e8 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -239,14 +239,51 @@ stats_db_name=${stats_db_name} openaire_db_name=${openaire_db_name} - +
- + ${hive_jdbc_url} - + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + + + + + + + + ${jobTracker} + ${nameNode} + contexts.sh + ${context_api_url} + ${stats_db_name} + contexts.sh + + + + + + + + ${jobTracker} + ${nameNode} + indicators.sh + ${stats_db_name} + ${wf:appPath()}/scripts/step16-createIndicatorsTables.sql + indicators.sh + + + + + + + + ${hive_jdbc_url} + stats_db_name=${stats_db_name} openaire_db_name=${openaire_db_name} @@ -261,48 +298,11 @@ stats_db_name=${stats_db_name} openaire_db_name=${openaire_db_name} - - - - - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - - - - - - - - ${jobTracker} - ${nameNode} - indicators.sh - ${stats_db_name} - ${wf:appPath()}/scripts/step16_7-createIndicatorsTables.sql - indicators.sh - - - - - - - - ${jobTracker} - ${nameNode} - contexts.sh - ${context_api_url} - ${stats_db_name} - contexts.sh - - + - + ${jobTracker} ${nameNode} @@ -326,20 +326,44 @@ ${wf:appPath()}/scripts/step20-createMonitorDB.sql monitor.sh + + + + + + + ${jobTracker} + ${nameNode} + observatory-pre.sh + ${stats_db_name} + ${observatory_db_name} + ${observatory_db_shadow_name} + observatory-pre.sh + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + observatory_db_name=${observatory_db_name} + + + + + + ${jobTracker} ${nameNode} - observatory.sh + observatory-post.sh ${stats_db_name} ${observatory_db_name} ${observatory_db_shadow_name} - ${wf:appPath()}/scripts/step21-createObservatoryDB.sql - observatory.sh + observatory-post.sh diff --git a/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java b/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java index 5b2e6804b..93f1bc087 100644 --- a/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java +++ b/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java @@ -7,16 +7,8 @@ package eu.dnetlib.oa.graph.usagerawdata.export; import java.sql.Connection; -import java.sql.DriverManager; import java.sql.SQLException; -import java.sql.Statement; -import java.util.Properties; -import org.apache.log4j.Logger; - -/** - * @author D. Pierrakos, S. Zoupanos - */ /** * @author D. Pierrakos, S. Zoupanos */ @@ -31,7 +23,9 @@ public abstract class ConnectDB { private static String dbImpalaUrl; private static String usageStatsDBSchema; private static String statsDBSchema; - private final static Logger log = Logger.getLogger(ConnectDB.class); + + private ConnectDB() { + } static void init() throws ClassNotFoundException { @@ -72,10 +66,6 @@ public abstract class ConnectDB { } private static Connection connectHive() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbHiveUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbHiveUrl); cpds.setAcquireIncrement(1); @@ -97,10 +87,6 @@ public abstract class ConnectDB { } private static Connection connectImpala() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbImpalaUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbImpalaUrl); cpds.setAcquireIncrement(1); diff --git a/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java b/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java index e53709f1a..afd7f9807 100644 --- a/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java +++ b/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java @@ -7,20 +7,14 @@ package eu.dnetlib.oa.graph.usagestatsbuild.export; import java.sql.Connection; -import java.sql.DriverManager; import java.sql.SQLException; -import java.sql.Statement; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; -import java.util.Properties; import org.apache.log4j.Logger; -/** - * @author D. Pierrakos, S. Zoupanos - */ /** * @author D. Pierrakos, S. Zoupanos */ @@ -37,7 +31,9 @@ public abstract class ConnectDB { private static String usageStatsDBSchema; private static String usagestatsPermanentDBSchema; private static String statsDBSchema; - private final static Logger log = Logger.getLogger(ConnectDB.class); + + private ConnectDB() { + } static void init() throws ClassNotFoundException { @@ -94,10 +90,6 @@ public abstract class ConnectDB { } private static Connection connectHive() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbHiveUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbHiveUrl); cpds.setAcquireIncrement(1); @@ -119,10 +111,6 @@ public abstract class ConnectDB { } private static Connection connectImpala() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbImpalaUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbImpalaUrl); cpds.setAcquireIncrement(1); diff --git a/dhp-workflows/dhp-workflow-profiles/pom.xml b/dhp-workflows/dhp-workflow-profiles/pom.xml index 54e76c1e2..b1c51c497 100644 --- a/dhp-workflows/dhp-workflow-profiles/pom.xml +++ b/dhp-workflows/dhp-workflow-profiles/pom.xml @@ -8,20 +8,5 @@ 4.0.0 dhp-workflow-profiles - jar - - \ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml new file mode 100644 index 000000000..e4680a0cf --- /dev/null +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml @@ -0,0 +1,100 @@ + +
+ + + + + +
+ + Import bipFinder scores + Import bipFinder scores + 30 + + + declares the path holding the BIP SCORE data + + bipScorePath + /data/bip/20201206 + + + + + + + declares the path holding the LATEST GRAPH dump + + latestGraphPath + /tmp/stable_ids/graph/14_graph_blacklisted + + + + + + + prepare action sets + + + [ + { + 'set' : 'bipfinder-scores', + 'jobProperty' : 'export_action_set_bipfinder-scores', + 'enablingProperty' : 'active_bipfinder-scores', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare AS for the bipFinder scores integration + + executeOozieJob + IIS + + { + 'bipScorePath':'bipScorePath', + 'inputPath':'latestGraphPath', + 'outputPath': 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/bipfinder/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/bipfinder' + } + + build-report + + + + + + + update action sets + + + + + + + + + + + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml new file mode 100644 index 000000000..d2ea9d35f --- /dev/null +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml @@ -0,0 +1,144 @@ + +
+ + + + + +
+ + Import Datacite ActionSet + Import InfoSpace + 30 + + + set the resume from + + resumeFrom + TransformDatacite + + + + + + + shall the datacite mapping produce the links? + + exportLinks + false + + + + + + + set the path storing the OAF Datacite records + + oafTargetPath + /data/datacite/production/datacite_oaf + + + + + + + set the input path for Datacite content + + datacitePath + /data/datacite + + + + + + + prepare action sets + + + [ + { + 'set' : 'datacite', + 'jobProperty' : 'export_action_set_datacite', + 'enablingProperty' : 'active_datacite', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare a new version of Datacite ActionSet + + executeOozieJob + IIS + + { + 'mainPath' : 'datacitePath', + 'oafTargetPath' : 'oafTargetPath', + 'exportLinks' : 'exportLinks', + 'resumeFrom' : 'resumeFrom' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/datacite_import/oozie_app', + 'sparkExecutorMemory' : '7G' + } + + build-report + + + + + + + prepare a new version of Datacite ActionSet + + executeOozieJob + IIS + + { + 'sourcePath' : 'oafTargetPath', + 'outputPath' : 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/datacite_actionset/oozie_app', + 'sparkExecutorMemory' : '7G' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210723_163342_752 + 2021-07-23T16:44:05+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml new file mode 100644 index 000000000..ce9eb8f4c --- /dev/null +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml @@ -0,0 +1,200 @@ + +
+ + + + + +
+ + Import DOIboost + Import InfoSpace + 30 + + + set the input path for MAG + + MAGDumpPath + /data/doiboost/mag-2021-02-15 + + + + + + + set the input path for CROSSREF dump + + crossrefDumpPath + /data/doiboost/crossref/ + + + + + + + set the intermediate path used to process MAG + + intermediatePathMAG + /data/doiboost/input/mag + + + + + + + set the input path for Crossref + + inputPathCrossref + /data/doiboost/input/crossref + + + + + + + set the timestamp for the Crossref incremental harvesting + + crossrefTimestamp + 1607614921429 + + + + + + + set the input path for UnpayWall + + inputPathUnpayWall + /data/doiboost/input/unpayWall + + + + + + + set the input path for ORCID + + inputPathOrcid + /data/orcid_activities_2020/last_orcid_dataset + + + + + + + set the working path for ORCID + + workingPathOrcid + /data/doiboost/input/orcid + + + + + + + set the hostedBy map path + + hostedByMapPath + /data/doiboost/input/hostedBy/hbMap.gz + + + + + + + set the oozie workflow name from which the execution will be resumed + + resumeFrom + ConvertCrossrefToOAF + + + + + + + wait configurations + + + + + + + prepare action sets + + + [ + { + 'set' : 'doiboost', + 'jobProperty' : 'export_action_set_doiboost', + 'enablingProperty' : 'active_doiboost', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare a new version of DOIBoost + + executeOozieJob + IIS + + { + 'crossrefTimestamp' : 'crossrefTimestamp', + 'hostedByMapPath' : 'hostedByMapPath', + 'MAGDumpPath' :'MAGDumpPath', + 'inputPathMAG' : 'intermediatePathMAG', + 'inputPathCrossref' : 'inputPathCrossref', + 'crossrefDumpPath':'crossrefDumpPath', + 'inputPathUnpayWall' : 'inputPathUnpayWall', + 'inputPathOrcid' : 'inputPathOrcid', + 'outputPath' : 'outputPath', + 'workingPathOrcid':'workingPathOrcid', + 'resumeFrom' : 'resumeFrom' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/doiboost_process/oozie_app', + 'workingPath' : '/data/doiboost/process_p', + 'sparkExecutorCores' : '2', + 'sparkExecutorIntersectionMemory' : '12G', + 'sparkExecutorMemory' : '8G', + 'esServer' : '[es_server]', + 'esIndex' : 'crossref' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210714_075237_381 + 2021-07-14T09:51:46+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml new file mode 100644 index 000000000..6d29e25a1 --- /dev/null +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml @@ -0,0 +1,132 @@ + +
+ + + + + +
+ + Import H2020classification + Import H2020classification + 30 + + + sets the URL to download the project file + + projectFileURL + https://cordis.europa.eu/data/cordis-h2020projects.csv + + + + + + + sets the URL to download the programme file + + programmeFileURL + https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv + + + + + + + sets the URL to download the topics file + + topicFileURL + https://cordis.europa.eu/data/reference/cordisref-h2020topics.xlsx + + + + + + + sets the name of the sheet in the topic file to be read + + sheetName + Topics + + + + + + + wait configurations + + + + + + + prepare action sets + + + [ + { + 'set' : 'h2020classification', + 'jobProperty' : 'export_action_set_h2020classification', + 'enablingProperty' : 'active_h2020classification', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare updates for the H2020 Classification + + executeOozieJob + IIS + + { + 'outputPath': 'outputPath', + 'sheetName':'sheetName', + 'projectFileURL' : 'projectFileURL', + 'programmeFileURL' : 'programmeFileURL', + 'topicFileURL':'topicFileURL' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/project/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/h2020classification', + 'postgresURL':'', + 'postgresUser':'', + 'postgresPassword':'' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210524_084803_740 + 2021-05-24T09:05:50+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml new file mode 100644 index 000000000..c5642dadc --- /dev/null +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml @@ -0,0 +1,101 @@ + +
+ + + + + +
+ + Import Orcid + Import InfoSpace + 30 + + + set the hdfs input path + + inputPath + /data/orcid_activities_2020 + + + + + + + set the temporary path where to store the action set + + processOutputPath + /tmp/prod_provision/working_path_orcid_activities + + + + + + + prepare action sets + + + [ + { + 'set' : 'orcidworks-no-doi', + 'jobProperty' : 'export_action_set_orcidworks_no_doi', + 'enablingProperty' : 'active_orcidworks_no_doi', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare updates for the Orcid No Doi + + executeOozieJob + IIS + + { + 'workingPath' : 'inputPath', + 'processOutputPath' : 'processOutputPath', + 'outputPath': 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/orcidnodoi_actionset/oozie_app', + 'spark2GenNoDoiDatasetMaxExecutors' : '200', + 'spark2GenNoDoiDatasetExecutorMemory' : '2G' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210713_170819_470 + 2021-07-13T17:28:26+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml new file mode 100644 index 000000000..4810fda3b --- /dev/null +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml @@ -0,0 +1,89 @@ + +
+ + + + + +
+ + Update ROR actionset + Import Infospace + 30 + + + Set the base path containing the no_doi_dataset folder + + inputPath + /data/ror/ror-data-2021-04-06.json + + + + + + + prepare action sets + + + [ + { + 'set' : 'ror', + 'jobProperty' : 'export_action_set_ror', + 'enablingProperty' : 'active_ror', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + update the ROR actionset + + executeOozieJob + IIS + + { + 'rorJsonInputPath' : 'inputPath', + 'rorActionSetPath': 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/ror/oozie_app', + 'workingDir': '/tmp/import_ror_actionset_prod' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210518_143542_478 + 2021-05-18T14:37:13+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/dataset_dedup_configuration.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/dataset_dedup_configuration.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/otherresearchproduct_dedup_configuration.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/otherresearchproduct_dedup_configuration.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/publication_dedup_configuration.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/publication_dedup_configuration.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/result_deduplication_orchestrator.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/result_deduplication_orchestrator.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/software_dedup_configuration.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/software_dedup_configuration.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml new file mode 100644 index 000000000..2fed35f44 --- /dev/null +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml @@ -0,0 +1,638 @@ + +
+ + + + + +
+ + Graph construction for IIS [BETA] + IIS + 30 + + + set blacklist of funder nsPrefixes from the beta aggregator + + nsPrefixBlacklist_BETA + gsrt________,rcuk________,fct_________ + + + + + + + set blacklist of funder nsPrefixes from the production aggregator + + nsPrefixBlacklist_PROD + gsrt________,rcuk________ + + + + + + + set the path of the map defining the relations id mappings + + idMappingPath + /data/maps/fct_map.json + + + + + + + Set the target path to store the MERGED graph + + mergedGraphPath + /tmp/beta_inference/graph/01_graph_merged + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/beta_inference/graph/02_graph_raw + + + + + + + Set the target path to store the CLEANED graph + + cleanedFirstGraphPath + /tmp/beta_inference/graph/03_graph_clean_first + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/beta_inference/graph/04_graph_dedup + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/beta_inference/graph/05_graph_consistent + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/beta_inference/graph/06_graph_cleaned + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + scholexplorer-dump,doiboost,orcidworks-no-doi,datacite + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims_PROD + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs_PROD + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims_PROD + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs_PROD + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB_PROD + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs_PROD + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF_PROD + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF_PROD + true + + + + + + + should apply the relations id patching based on the provided idMapping on PROD? + + shouldPatchRelations_PROD + false + + + + + + + set the PROD aggregator content path + + prodContentPath + /tmp/prod_aggregator_for_beta + + + + + + + Set the path containing the PROD AGGREGATOR graph + + prodAggregatorGraphPath + /tmp/beta_inference/graph/00_prod_graph_aggregator + + + + + + + reuse cached ODF claims from the BETA aggregation system + + reuseODFClaims_BETA + true + + + + + + + reuse cached ODF records on HDFS from the BETA aggregation system + + reuseODFhdfs_BETA + true + + + + + + + reuse cached OAF claims from the BETA aggregation system + + reuseOAFClaims_BETA + true + + + + + + + reuse cached OAF records on HDFS from the BETA aggregation system + + reuseOAFhdfs_BETA + true + + + + + + + reuse cached DB content from the BETA aggregation system + + reuseDB_BETA + true + + + + + + + reuse cached OpenOrgs content from the BETA aggregation system + + reuseDBOpenorgs_BETA + true + + + + + + + reuse cached ODF content from the BETA aggregation system + + reuseODF_BETA + true + + + + + + + reuse cached OAF content from the BETA aggregation system + + reuseOAF_BETA + true + + + + + + + should apply the relations id patching based on the provided idMapping on BETA? + + shouldPatchRelations_BETA + false + + + + + + + set the BETA aggregator content path + + betaContentPath + /tmp/beta_aggregator + + + + + + + Set the path containing the BETA AGGREGATOR graph + + betaAggregatorGraphPath + /tmp/beta_inference/graph/00_beta_graph_aggregator + + + + + + + wait configurations + + + + + + + + create the BETA AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'betaAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_BETA', + 'reuseOAFClaims' : 'reuseOAFClaims_BETA', + 'reuseDB' : 'reuseDB_BETA', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_BETA', + 'reuseODF' : 'reuseODF_BETA', + 'reuseODF_hdfs' : 'reuseODFhdfs_BETA', + 'reuseOAF' : 'reuseOAF_BETA', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA', + 'contentPath' : 'betaContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_BETA', + 'shouldPatchRelations' : 'shouldPatchRelations_BETA', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_inference/working_dir/beta_aggregator' + } + + build-report + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'prodAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_PROD', + 'reuseOAFClaims' : 'reuseOAFClaims_PROD', + 'reuseDB' : 'reuseDB_PROD', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_PROD', + 'reuseODF' : 'reuseODF_PROD', + 'reuseODF_hdfs' : 'reuseODFhdfs_PROD', + 'reuseOAF' : 'reuseOAF_PROD', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD', + 'contentPath' : 'prodContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_PROD', + 'shouldPatchRelations' : 'shouldPatchRelations_PROD', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_inference/working_dir/prod_aggregator' + } + + build-report + + + + + + + wait configurations + + + + + + + create the AGGREGATOR graph + + executeOozieJob + IIS + + { + 'betaInputGraphPath' : 'betaAggregatorGraphPath', + 'prodInputGraphPath' : 'prodAggregatorGraphPath', + 'graphOutputPath' : 'mergedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/merge/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/merge_graph', + 'priority' : 'BETA' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'mergedGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/beta_inference/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/clean_first' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'cleanedFirstGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/beta_inference/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'dedupGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/beta_inference/working_dir/dedup' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'consistentGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/clean' + } + + build-report + + + + + + + + wf_20210730_094240_462 + 2021-07-30T15:04:19+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml new file mode 100644 index 000000000..e5ce3d710 --- /dev/null +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml @@ -0,0 +1,437 @@ + +
+ + + + + +
+ + Graph construction for IIS [PROD NEW] + IIS + 30 + + + set blacklist of funder nsPrefixes + + nsPrefixBlacklist + conicytf____,dfgf________,gsrt________,innoviris___,miur________,rif_________,rsf_________,sgov________,sfrs________ + + + + + + + set the path of the map defining the relations id mappings + + idMappingPath + /data/maps/fct_map.json + + + + + + + Set the path containing the PROD AGGREGATOR graph + + aggregatorGraphPath + /tmp/prod_inference/graph/00_graph_aggregator + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/prod_inference/graph/01_graph_raw + + + + + + + Set the target path to store the CLEANED graph + + cleanedFirstGraphPath + /tmp/prod_inference/graph/02_graph_clean_first + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/prod_inference/graph/03_graph_dedup + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/prod_inference/graph/04_graph_consistent + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/prod_inference/graph/05_graph_cleaned + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + scholexplorer-dump,doiboost,orcidworks-no-doi,datacite + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs + true + + + + + + + should apply the relations id patching based on the provided idMapping? + + shouldPatchRelations + false + + + + + + + set the PROD aggregator content path + + contentPath + /tmp/prod_aggregator + + + + + + + wait configurations + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'aggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims', + 'reuseOAFClaims' : 'reuseOAFClaims', + 'reuseDB' : 'reuseDB', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs', + 'reuseODF' : 'reuseODF', + 'reuseODF_hdfs' : 'reuseODFhdfs', + 'reuseOAF' : 'reuseOAF', + 'reuseOAF_hdfs' : 'reuseOAFhdfs', + 'contentPath' : 'contentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/prod_inference/working_dir/prod_aggregator' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'aggregatorGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/prod_inference/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_inference/working_dir/clean_first' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'cleanedFirstGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/prod_inference/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'dedupGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/prod_inference/working_dir/dedup' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'consistentGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_inference/working_dir/clean' + } + + build-report + + + + + + + + wf_20210719_165159_86 + 2021-07-19T20:45:09+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml new file mode 100644 index 000000000..126d5f58d --- /dev/null +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml @@ -0,0 +1,225 @@ + +
+ + + + + +
+ + IIS main workflow V3 [PROD] + IIS + 30 + + + start + + + + + + + Set a regex of funder shortnames to exclude from the project reference processing + + referenceextraction_project_fundingclass_blacklist_regex + ^DFG::.*$|^CONICYT::.*$|^RSF::.*$|^SGOV::.*$|^GSRT::.*$|^MIUR::.*$|^INNOVIRIS::.*$|^RIF::.*$|^SFRS::.*$ + + + + + + + prepare action sets + + + [ + { + 'set' : 'iis-document-affiliation', + 'jobProperty' : 'export_action_set_id_matched_doc_organizations', + 'enablingProperty' : 'active_document_affiliation', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-projects-main', + 'jobProperty' : 'export_action_set_id_document_referencedProjects', + 'enablingProperty' : 'active_referenceextraction_project', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-datasets-main', + 'jobProperty' : 'export_action_set_id_document_referencedDatasets', + 'enablingProperty' : 'active_referenceextraction_dataset', + 'enabled' : 'true' + }, + { + 'set' : 'iis-researchinitiative', + 'jobProperty' : 'export_action_set_id_document_research_initiative', + 'enablingProperty' : 'active_referenceextraction_researchinitiative', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-similarities', + 'jobProperty' : 'export_action_set_id_document_similarities_standard', + 'enablingProperty' : 'active_documentssimilarity', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-classes', + 'jobProperty' : 'export_action_set_id_document_classes', + 'enablingProperty' : 'active_documentsclassification', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-citations', + 'jobProperty' : 'export_action_set_id_document_referencedDocuments', + 'enablingProperty' : 'active_citationmatching', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-citations-relations', + 'jobProperty' : 'export_action_set_id_citation_relations', + 'enablingProperty' : 'active_citationmatching_relations', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenceextraction-pdb', + 'jobProperty' : 'export_action_set_id_document_pdb', + 'enablingProperty' : 'active_referenceextraction_pdb', + 'enabled' : 'true' + }, + { + 'set' : 'document_software_url', + 'jobProperty' : 'export_action_set_id_document_software_url', + 'enablingProperty' : 'active_referenceextraction_software_url', + 'enabled' : 'true' + }, + { + 'set' : 'iis-entities-software', + 'jobProperty' : 'export_action_set_id_entity_software', + 'enablingProperty' : 'active_referenceextraction_software_url', + 'enabled' : 'true' + }, + { + 'set' : 'iis-communities', + 'jobProperty' : 'export_action_set_id_document_community', + 'enablingProperty' : 'active_referenceextraction_community', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-patents', + 'jobProperty' : 'export_action_set_id_document_patent', + 'enablingProperty' : 'active_referenceextraction_patent', + 'enabled' : 'true' + }, + { + 'set' : 'iis-entities-patent', + 'jobProperty' : 'export_action_set_id_entity_patent', + 'enablingProperty' : 'active_referenceextraction_patent', + 'enabled' : 'true' + }, + { + 'set' : 'iis-covid-19', + 'jobProperty' : 'export_action_set_id_document_covid19', + 'enablingProperty' : 'active_referenceextraction_covid19', + 'enabled' : 'true' + } + ] + + + + + + + + prepare parameters + + import_islookup_service_location + import_content_objectstores_csv + import_content_object_store_location + import_mdstore_service_location + import_dataset_mdstore_ids_csv + oozie.wf.application.path + /lib/iis/primary/snapshots/2021-06-23 + IIS + /tmp/prod_inference/graph/05_graph_cleaned + import_infospace_graph_location + + import_project_concepts_context_ids_csv + aginfra,beopen,clarin,covid-19,dariah,dh-ch,oa-pg,egi,elixir-gr,enermaps,epos,fam,fet-fp7,fet-h2020,gotriple,instruct,mes,ni,rda,science-innovation-policy,risis,rural-digital-europe,sdsn-gr,sobigdata + + + + + + + IIS main + + iisMainJobV3 + + { + 'cluster' : 'cluster', + 'oozie.wf.application.path' : 'oozie.wf.application.path', + 'referenceextraction_project_fundingclass_blacklist_regex' : 'referenceextraction_project_fundingclass_blacklist_regex', + + 'active_document_affiliation' : 'active_document_affiliation', + 'active_referenceextraction_project' : 'active_referenceextraction_project', + 'active_referenceextraction_dataset' : 'active_referenceextraction_dataset', + 'active_referenceextraction_researchinitiative' : 'active_referenceextraction_researchinitiative', + 'active_documentsclassification' : 'active_documentsclassification', + 'active_documentssimilarity' : 'active_documentssimilarity', + 'active_citationmatching' : 'active_citationmatching', + 'active_citationmatching_relations' : 'active_citationmatching_relations', + 'active_referenceextraction_pdb' : 'active_referenceextraction_pdb', + 'active_referenceextraction_software_url' : 'active_referenceextraction_software_url', + 'active_referenceextraction_community' : 'active_referenceextraction_community', + 'active_referenceextraction_patent' : 'active_referenceextraction_patent', + 'active_referenceextraction_covid19' : 'active_referenceextraction_covid19', + + 'import_content_objectstores_csv' : 'import_content_objectstores_csv', + 'import_content_object_store_location' : 'import_content_object_store_location', + 'import_mdstore_service_location' : 'import_mdstore_service_location', + 'import_islookup_service_location' : 'import_islookup_service_location', + 'import_project_concepts_context_ids_csv' : 'import_project_concepts_context_ids_csv', + 'import_dataset_mdstore_ids_csv' : 'import_dataset_mdstore_ids_csv', + 'import_infospace_graph_location' : 'import_infospace_graph_location', + + 'export_action_set_id_matched_doc_organizations' : 'export_action_set_id_matched_doc_organizations', + 'export_action_set_id_document_referencedDatasets' : 'export_action_set_id_document_referencedDatasets', + 'export_action_set_id_document_referencedProjects' : 'export_action_set_id_document_referencedProjects', + 'export_action_set_id_document_research_initiative' : 'export_action_set_id_document_research_initiative', + 'export_action_set_id_document_similarities_standard' : 'export_action_set_id_document_similarities_standard', + + 'export_action_set_id_document_referencedDocuments' : 'export_action_set_id_document_referencedDocuments', + 'export_action_set_id_document_pdb' : 'export_action_set_id_document_pdb', + 'export_action_set_id_document_software_url' : 'export_action_set_id_document_software_url', + 'export_action_set_id_entity_software' : 'export_action_set_id_entity_software', + 'export_action_set_id_document_community' : 'export_action_set_id_document_community', + 'export_action_set_id_document_patent' : 'export_action_set_id_document_patent', + 'export_action_set_id_entity_patent' : 'export_action_set_id_entity_patent', + 'export_action_set_id_document_covid19' : 'export_action_set_id_document_covid19', + 'export_action_set_id_document_classes' : 'export_action_set_id_document_classes' + } + + false + build-report + + + + + + + update action sets + + + + + + + + wf_20210719_221139_780 + 2021-07-21T01:23:13+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_prod_construction.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml similarity index 60% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_prod_construction.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml index 4eab12c73..f83337b3c 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_prod_construction.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml @@ -1,215 +1,196 @@
- + - +
- Graph Construction [PROD] + Graph Construction [BETA] Data Provision 30 - - - reuse cached content from the aggregation system + + set blacklist of funder nsPrefixes from the beta aggregator - reuseContent - true + nsPrefixBlacklist_BETA + gsrt________,rcuk________,fct_________ - - - set the aggregator content path + + set blacklist of funder nsPrefixes from the production aggregator - contentPath - /tmp/beta_aggregator + nsPrefixBlacklist_PROD + gsrt________,rcuk________ - - - Set the path containing the AGGREGATOR graph + + set the path of the map defining the relations id mappings - aggregatorGraphPath - /tmp/beta_provision/graph/00_graph_aggregator + idMappingPath + /data/maps/fct_map.json + + + + + + + Set the target path to store the MERGED graph + + mergedGraphPath + /tmp/beta_provision/graph/01_graph_merged - Set the target path to store the RAW graph rawGraphPath - /tmp/beta_provision/graph/01_graph_raw + /tmp/beta_provision/graph/02_graph_raw - - - Set the target path to store the first CLEANED graph + + Set the target path to store the the consistent graph cleaned - firstCleanedGraphPath - /tmp/prod_provision/graph/02_graph_first_cleaned + cleanedFirstGraphPath + /tmp/beta_provision/graph/03_graph_cleaned - Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/beta_provision/graph/03_graph_dedup + /tmp/beta_provision/graph/04_graph_dedup - Set the target path to store the INFERRED graph inferredGraphPath - /tmp/beta_provision/graph/04_graph_inferred + /tmp/beta_provision/graph/05_graph_inferred - Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/beta_provision/graph/05_graph_consistent + /tmp/beta_provision/graph/06_graph_consistent - Set the target path to store the ORCID enriched graph orcidGraphPath - /tmp/beta_provision/graph/06_graph_orcid + /tmp/beta_provision/graph/07_graph_orcid - Set the target path to store the BULK TAGGED graph bulkTaggingGraphPath - /tmp/beta_provision/graph/07_graph_bulktagging + /tmp/beta_provision/graph/08_graph_bulktagging - Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph affiliationGraphPath - /tmp/beta_provision/graph/08_graph_affiliation + /tmp/beta_provision/graph/09_graph_affiliation - Set the target path to store the COMMUNITY from SELECTED SOURCES graph communityOrganizationGraphPath - /tmp/beta_provision/graph/09_graph_comunity_organization + /tmp/beta_provision/graph/10_graph_comunity_organization - Set the target path to store the FUNDING from SEMANTIC RELATION graph fundingGraphPath - /tmp/beta_provision/graph/10_graph_funding + /tmp/beta_provision/graph/11_graph_funding - Set the target path to store the COMMUNITY from SEMANTIC RELATION graph communitySemRelGraphPath - /tmp/beta_provision/graph/11_graph_comunity_sem_rel + /tmp/beta_provision/graph/12_graph_comunity_sem_rel - Set the target path to store the COUNTRY enriched graph countryGraphPath - /tmp/beta_provision/graph/12_graph_country + /tmp/beta_provision/graph/13_graph_country - Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/beta_provision/graph/13_graph_cleaned + /tmp/beta_provision/graph/14_graph_cleaned - Set the target path to store the blacklisted graph blacklistedGraphPath - /tmp/beta_provision/graph/14_graph_blacklisted + /tmp/beta_provision/graph/15_graph_blacklisted - - - Set the lookup address - - isLookUpUrl - http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl - - - - - - Set the map of paths for the Bulk Tagging @@ -220,7 +201,6 @@ - Set the map of associations organization, community list for the propagation of community to result through organization @@ -233,70 +213,398 @@ - Set the dedup orchestrator name dedupConfig - decisiontree-dedup-test + dedup-similarity-result-decisiontree-v2 - declares the ActionSet ids to promote in the RAW graph actionSetIdsRawGraph - scholexplorer-dump,gridac-dump,doiboost-organizations,doiboost,orcidworks-no-doi,iis-wos-entities,iis-entities-software,iis-entities-patent + scholexplorer-dump,doiboost,orcidworks-no-doi,iis-entities-software,iis-entities-patent,datacite - declares the ActionSet ids to promote in the INFERRED graph actionSetIdsIISGraph - iis-researchinitiative,iis-document-citations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-datasets-preprocessing,iis-referenced-projects-main,iis-referenced-projects-preprocessing,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19 + iis-researchinitiative,iis-document-citations,iis-document-citations-relations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-projects-main,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19,h2020classification,bipfinder-scores + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl - wait configurations - + + - - + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims_PROD + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs_PROD + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims_PROD + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs_PROD + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB_PROD + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs_PROD + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF_PROD + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF_PROD + true + + + + + + + should apply the relations id patching based on the provided idMapping on PROD? + + shouldPatchRelations_PROD + true + + + + + + + set the PROD aggregator content path + + prodContentPath + /tmp/prod_aggregator_for_beta + + + + + + + Set the path containing the PROD AGGREGATOR graph + + prodAggregatorGraphPath + /tmp/beta_provision/graph/00_prod_graph_aggregator + + + + + + + reuse cached ODF claims from the BETA aggregation system + + reuseODFClaims_BETA + true + + + + + + + reuse cached ODF records on HDFS from the BETA aggregation system + + reuseODFhdfs_BETA + true + + + + + + + reuse cached OAF claims from the BETA aggregation system + + reuseOAFClaims_BETA + true + + + + + + + reuse cached OAF records on HDFS from the BETA aggregation system + + reuseOAFhdfs_BETA + true + + + + + + + reuse cached DB content from the BETA aggregation system + + reuseDB_BETA + true + + + + + + + reuse cached OpenOrgs content from the BETA aggregation system + + reuseDBOpenorgs_BETA + true + + + + + + + reuse cached ODF content from the BETA aggregation system + + reuseODF_BETA + true + + + + + + + reuse cached OAF content from the BETA aggregation system + + reuseOAF_BETA + true + + + + + + + should apply the relations id patching based on the provided idMapping on BETA? + + shouldPatchRelations_BETA + true + + + + + + + set the BETA aggregator content path + + betaContentPath + /tmp/beta_aggregator + + + + + + + Set the path containing the BETA AGGREGATOR graph + + betaAggregatorGraphPath + /tmp/beta_provision/graph/00_beta_graph_aggregator + + + + + + + wait configurations + + + + + + + + create the BETA AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'betaAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_BETA', + 'reuseOAFClaims' : 'reuseOAFClaims_BETA', + 'reuseDB' : 'reuseDB_BETA', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_BETA', + 'reuseODF' : 'reuseODF_BETA', + 'reuseODF_hdfs' : 'reuseODFhdfs_BETA', + 'reuseOAF' : 'reuseOAF_BETA', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA', + 'contentPath' : 'betaContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_BETA', + 'shouldPatchRelations' : 'shouldPatchRelations_BETA', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_provision/working_dir/beta_aggregator' + } + + build-report + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'prodAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_PROD', + 'reuseOAFClaims' : 'reuseOAFClaims_PROD', + 'reuseDB' : 'reuseDB_PROD', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_PROD', + 'reuseODF' : 'reuseODF_PROD', + 'reuseODF_hdfs' : 'reuseODFhdfs_PROD', + 'reuseOAF' : 'reuseOAF_PROD', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD', + 'contentPath' : 'prodContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_PROD', + 'shouldPatchRelations' : 'shouldPatchRelations_PROD', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_provision/working_dir/prod_aggregator' + } + + build-report + + + + + + + wait configurations + + + + + + create the AGGREGATOR graph executeOozieJob IIS { - 'graphOutputPath' : 'aggregatorGraphPath', - 'isLookupUrl' : 'isLookUpUrl', - 'reuseContent' : 'reuseContent', - 'contentPath' : 'contentPath' + 'betaInputGraphPath' : 'betaAggregatorGraphPath', + 'prodInputGraphPath' : 'prodAggregatorGraphPath', + 'graphOutputPath' : 'mergedGraphPath' } { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/raw_all/oozie_app', - 'mongoURL' : 'mongodb://beta.services.openaire.eu', - 'mongoDb' : 'mdstore', - 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', - 'postgresPassword' : '', - 'workingDir' : '/tmp/beta_provision/working_dir/aggregator' + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/merge/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/merge_graph', + 'priority' : 'BETA' } build-report @@ -305,7 +613,6 @@ - create the RAW graph @@ -314,14 +621,14 @@ { 'inputActionSetIds' : 'actionSetIdsRawGraph', - 'inputGraphRootPath' : 'aggregatorGraphPath', + 'inputGraphRootPath' : 'mergedGraphPath', 'outputGraphRootPath' : 'rawGraphPath', 'isLookupUrl' : 'isLookUpUrl' } { - 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app', + 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app', 'sparkExecutorCores' : '3', 'sparkExecutorMemory' : '10G', 'activePromoteDatasetActionPayload' : 'true', @@ -343,7 +650,6 @@ - clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid @@ -352,14 +658,39 @@ { 'graphInputPath' : 'rawGraphPath', - 'graphOutputPath': 'firstCleanedGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', 'isLookupUrl': 'isLookUpUrl' } { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/clean/oozie_app', - 'workingPath' : '/tmp/beta_provision/working_dir/first_clean' + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/clean' + } + + build-report + + + + + + + updates publication's hostedby info according to the ISSNs available from DOAJ and UNIBI + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedFirstGraphPath' + } + + + { + 'resumeFrom' : 'produceHBM', + 'hostedByMapPath' : '/user/dnet.beta/data', + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/hostedbymap/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/hostedbymap', + 'outputPath' : '/tmp/beta_provision/working_dir/hostedbymap' } build-report @@ -368,7 +699,6 @@ - search for duplicates in the raw graph @@ -377,15 +707,18 @@ { 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'firstCleanedGraphPath', + 'graphBasePath' : 'cleanedFirstGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } { - 'oozie.wf.application.path' : '/lib/dnet/oa/dedup/scan/oozie_app', - 'workingPath' : '/tmp/beta_provision/working_dir/dedup' + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/beta_provision/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' } build-report @@ -394,7 +727,6 @@ - create the INFERRED graph @@ -410,7 +742,7 @@ { - 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app', + 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app', 'sparkExecutorCores' : '3', 'sparkExecutorMemory' : '10G', 'activePromoteDatasetActionPayload' : 'true', @@ -432,7 +764,6 @@ - mark duplicates as deleted and redistribute the relationships @@ -441,12 +772,12 @@ { 'graphBasePath' : 'inferredGraphPath', - 'dedupGraphPath': 'consistentGraphPath' + 'graphOutputPath': 'consistentGraphPath' } { - 'oozie.wf.application.path' : '/lib/dnet/oa/dedup/consistency/oozie_app', + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/consistency/oozie_app', 'workingPath' : '/tmp/beta_provision/working_dir/dedup' } @@ -470,10 +801,12 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/orcidtoresultfromsemrel/oozie_app', + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/orcidtoresultfromsemrel/oozie_app', 'workingDir' : '/tmp/beta_provision/working_dir/orcid', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', - 'saveGraph' : 'true' + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo;isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' } build-report @@ -482,7 +815,6 @@ - mark results respecting some rules as belonging to communities @@ -498,7 +830,7 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/bulktag/oozie_app', + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/bulktag/oozie_app', 'workingDir' : '/tmp/beta_provision/working_dir/bulktag' } @@ -508,7 +840,6 @@ - creates relashionships between results and organizations when the organizations are associated to institutional repositories @@ -522,9 +853,10 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/affiliation/oozie_app', + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/affiliation/oozie_app', 'workingDir' : '/tmp/beta_provision/working_dir/affiliation', - 'saveGraph' : 'true' + 'saveGraph' : 'true', + 'blacklist' : 'empty' } build-report @@ -533,7 +865,6 @@ - marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap @@ -548,7 +879,7 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_organization/oozie_app', + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/community_organization/oozie_app', 'workingDir' : '/tmp/beta_provision/working_dir/community_organization', 'saveGraph' : 'true' } @@ -559,7 +890,6 @@ - created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects @@ -573,9 +903,9 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/funding/oozie_app', + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/funding/oozie_app', 'workingDir' : '/tmp/beta_provision/working_dir/funding', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo', 'saveGraph' : 'true' } @@ -585,7 +915,6 @@ - tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities @@ -600,9 +929,9 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_semrel/oozie_app', + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/community_semrel/oozie_app', 'workingDir' : '/tmp/beta_provision/working_dir/community_semrel', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo', 'saveGraph' : 'true' } @@ -612,7 +941,6 @@ - associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from @@ -626,12 +954,12 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/country/oozie_app', + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/country/oozie_app', 'sparkExecutorCores' : '3', 'sparkExecutorMemory' : '10G', 'workingDir' : '/tmp/beta_provision/working_dir/country', 'allowedtypes' : 'pubsrepository::institutional', - 'whitelist' : '10|opendoar____::300891a62162b960cf02ce3827bb363c', + 'whitelist' : '10|openaire____::e783372970a1dc066ce99c673090ff88;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0', 'saveGraph' : 'true' } @@ -641,7 +969,6 @@ - clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid @@ -656,8 +983,8 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/clean/oozie_app', - 'workingPath' : '/tmp/beta_provision/working_dir/clean' + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/clean' } build-report @@ -666,7 +993,6 @@ - removes blacklisted relations @@ -680,10 +1006,10 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/blacklist/oozie_app', + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/blacklist/oozie_app', 'workingDir' : '/tmp/beta_provision/working_dir/blacklist', - 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', + 'postgresURL' : '', + 'postgresUser' : '', 'postgresPassword' : '' } @@ -693,11 +1019,10 @@ - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 + wf_20210803_134357_367 + 2021-08-03T17:08:11+00:00 SUCCESS diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_beta_construction.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml similarity index 77% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_beta_construction.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml index 08ed24cd0..be6155f2f 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_beta_construction.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml @@ -1,248 +1,176 @@
- + - +
- Graph Construction [HYBRID] + Graph construction [PROD NEW] Data Provision 30 - - - reuse cached content from the PROD aggregation system + + set blacklist of funder nsPrefixes - reuseProdContent - true + nsPrefixBlacklist + conicytf____,dfgf________,gsrt________,innoviris___,miur________,rif_________,rsf_________,sgov________,sfrs________ - + - - - set the PROD aggregator content path - - prodContentPath - /tmp/core_aggregator - - - - - - - + Set the path containing the PROD AGGREGATOR graph - prodAggregatorGraphPath - /tmp/core_provision/graph/00_prod_graph_aggregator + aggregatorGraphPath + /tmp/prod_provision/graph/00_prod_graph_aggregator - - - reuse cached content from the BETA aggregation system - - reuseBetaContent - true - - - - - - - - set the BETA aggregator content path - - betaContentPath - /tmp/beta_aggregator - - - - - - - - Set the path containing the BETA AGGREGATOR graph - - betaAggregatorGraphPath - /tmp/core_provision/graph/00_beta_graph_aggregator - - - - - - - - Set the IS lookup service address - - isLookUpUrl - http://services.openaire.eu:8280/is/services/isLookUp?wsdl - - - - - - - - Set the target path to store the MERGED graph - - mergedGraphPath - /tmp/core_provision/graph/01_graph_merged - - - - - - Set the target path to store the RAW graph rawGraphPath - /tmp/core_provision/graph/02_graph_raw + /tmp/prod_provision/graph/01_graph_raw + + + + + + + Set the target path to store the the consistent graph cleaned + + cleanedFirstGraphPath + /tmp/prod_provision/graph/02_graph_cleaned - Set the target path to store the DEDUPED graph dedupGraphPath - /tmp/core_provision/graph/03_graph_dedup + /tmp/prod_provision/graph/03_graph_dedup - Set the target path to store the INFERRED graph inferredGraphPath - /tmp/core_provision/graph/04_graph_inferred + /tmp/prod_provision/graph/04_graph_inferred - Set the target path to store the CONSISTENCY graph consistentGraphPath - /tmp/core_provision/graph/05_graph_consistent + /tmp/prod_provision/graph/05_graph_consistent - Set the target path to store the ORCID enriched graph orcidGraphPath - /tmp/core_provision/graph/06_graph_orcid + /tmp/prod_provision/graph/06_graph_orcid - Set the target path to store the BULK TAGGED graph bulkTaggingGraphPath - /tmp/core_provision/graph/07_graph_bulktagging + /tmp/prod_provision/graph/07_graph_bulktagging - Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph affiliationGraphPath - /tmp/core_provision/graph/08_graph_affiliation + /tmp/prod_provision/graph/08_graph_affiliation - Set the target path to store the COMMUNITY from SELECTED SOURCES graph communityOrganizationGraphPath - /tmp/core_provision/graph/09_graph_comunity_organization + /tmp/prod_provision/graph/09_graph_comunity_organization - Set the target path to store the FUNDING from SEMANTIC RELATION graph fundingGraphPath - /tmp/core_provision/graph/10_graph_funding + /tmp/prod_provision/graph/10_graph_funding - Set the target path to store the COMMUNITY from SEMANTIC RELATION graph communitySemRelGraphPath - /tmp/core_provision/graph/11_graph_comunity_sem_rel + /tmp/prod_provision/graph/11_graph_comunity_sem_rel - Set the target path to store the COUNTRY enriched graph countryGraphPath - /tmp/core_provision/graph/12_graph_country + /tmp/prod_provision/graph/12_graph_country - Set the target path to store the CLEANED graph cleanedGraphPath - /tmp/core_provision/graph/13_graph_cleaned + /tmp/prod_provision/graph/13_graph_cleaned - Set the target path to store the blacklisted graph blacklistedGraphPath - /tmp/core_provision/graph/14_graph_blacklisted + /tmp/prod_provision/graph/14_graph_blacklisted - Set the map of paths for the Bulk Tagging @@ -253,7 +181,6 @@ - Set the map of associations organization, community list for the propagation of community to result through organization @@ -266,136 +193,194 @@ - Set the dedup orchestrator name dedupConfig - decisiontree-dedup-test + dedup-similarity-result-decisiontree-v2 - declares the ActionSet ids to promote in the RAW graph actionSetIdsRawGraph - scholexplorer-dump,gridac-dump,doiboost-organizations,doiboost,orcidworks-no-doi,iis-wos-entities,iis-entities-software,iis-entities-patent + scholexplorer-dump,doiboost,orcidworks-no-doi,iis-entities-software,iis-entities-patent,datacite - declares the ActionSet ids to promote in the INFERRED graph actionSetIdsIISGraph - iis-researchinitiative,iis-document-citations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-datasets-preprocessing,iis-referenced-projects-main,iis-referenced-projects-preprocessing,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19 + iis-researchinitiative,iis-document-citations,iis-document-citations-relations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-projects-main,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19,h2020classification,bipfinder-scores + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://services.openaire.eu:8280/is/services/isLookUp?wsdl - wait configurations - - + + + + + + + + + - - - - create the AGGREGATOR graph + + reuse cached ODF claims from the PROD aggregation system - executeOozieJob - IIS - - { - 'graphOutputPath' : 'betaAggregatorGraphPath', - 'isLookupUrl' : 'isLookUpUrl', - 'reuseContent' : 'reuseBetaContent', - 'contentPath' : 'betaContentPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/raw_all/oozie_app', - 'mongoURL' : 'mongodb://beta.services.openaire.eu', - 'mongoDb' : 'mdstore', - 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', - 'postgresPassword' : '', - 'workingDir' : '/tmp/core_provision/working_dir/beta_aggregator' - } - - build-report + reuseODFClaims + true - + - - - create the AGGREGATOR graph + + reuse cached ODF records on HDFS from the PROD aggregation system - executeOozieJob - IIS - - { - 'graphOutputPath' : 'prodAggregatorGraphPath', - 'isLookupUrl' : 'isLookUpUrl', - 'reuseContent' : 'reuseProdContent', - 'contentPath' : 'prodContentPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/raw_all/oozie_app', - 'mongoURL' : 'mongodb://services.openaire.eu', - 'mongoDb' : 'mdstore', - 'postgresURL' : 'jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', - 'postgresPassword' : '', - 'workingDir' : '/tmp/core_provision/working_dir/prod_aggregator' - } - - build-report + reuseODFhdfs + true - + - - + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF + true + + + + + + + set the PROD aggregator content path + + contentPath + /tmp/prod_aggregator + + + + + + wait configurations - + - - - create the AGGREGATOR graph + + create the PROD AGGREGATOR graph executeOozieJob IIS { - 'betaInputGgraphPath' : 'betaAggregatorGraphPath', - 'prodInputGgraphPath' : 'prodAggregatorGraphPath', - 'graphOutputPath' : 'mergedGraphPath' + 'graphOutputPath' : 'aggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims', + 'reuseOAFClaims' : 'reuseOAFClaims', + 'reuseDB' : 'reuseDB', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs', + 'reuseODF' : 'reuseODF', + 'reuseODF_hdfs' : 'reuseODFhdfs', + 'reuseOAF' : 'reuseOAF', + 'reuseOAF_hdfs' : 'reuseOAFhdfs', + 'contentPath' : 'contentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist' } { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/merge/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/merge_graph' + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/prod_provision/working_dir/prod_aggregator' } build-report @@ -404,7 +389,6 @@ - create the RAW graph @@ -413,14 +397,14 @@ { 'inputActionSetIds' : 'actionSetIdsRawGraph', - 'inputGraphRootPath' : 'mergedGraphPath', + 'inputGraphRootPath' : 'aggregatorGraphPath', 'outputGraphRootPath' : 'rawGraphPath', 'isLookupUrl' : 'isLookUpUrl' } { - 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app', + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app', 'sparkExecutorCores' : '3', 'sparkExecutorMemory' : '10G', 'activePromoteDatasetActionPayload' : 'true', @@ -433,7 +417,31 @@ 'activePromoteResultActionPayload' : 'true', 'activePromoteSoftwareActionPayload' : 'true', 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', - 'workingDir' : '/tmp/core_provision/working_dir/promoteActionsRaw' + 'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/clean' } build-report @@ -442,7 +450,6 @@ - search for duplicates in the raw graph @@ -451,15 +458,18 @@ { 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'rawGraphPath', + 'graphBasePath' : 'cleanedFirstGraphPath', 'dedupGraphPath': 'dedupGraphPath', 'isLookUpUrl' : 'isLookUpUrl' } { - 'oozie.wf.application.path' : '/lib/dnet/oa/dedup/scan/oozie_app', - 'workingPath' : '/tmp/core_provision/working_dir/dedup' + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/prod_provision/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' } build-report @@ -468,7 +478,6 @@ - create the INFERRED graph @@ -484,7 +493,7 @@ { - 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app', + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app', 'sparkExecutorCores' : '3', 'sparkExecutorMemory' : '10G', 'activePromoteDatasetActionPayload' : 'true', @@ -497,7 +506,7 @@ 'activePromoteResultActionPayload' : 'true', 'activePromoteSoftwareActionPayload' : 'true', 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', - 'workingDir' : '/tmp/core_provision/working_dir/promoteActionsIIS' + 'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsIIS' } build-report @@ -506,7 +515,6 @@ - mark duplicates as deleted and redistribute the relationships @@ -515,13 +523,13 @@ { 'graphBasePath' : 'inferredGraphPath', - 'dedupGraphPath': 'consistentGraphPath' + 'graphOutputPath': 'consistentGraphPath' } { - 'oozie.wf.application.path' : '/lib/dnet/oa/dedup/consistency/oozie_app', - 'workingPath' : '/tmp/core_provision/working_dir/dedup' + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/prod_provision/working_dir/dedup' } build-report @@ -530,7 +538,6 @@ - propagates ORCID among results linked by allowedsemrels semantic relationships @@ -544,8 +551,8 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/orcidtoresultfromsemrel/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/orcid', + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/orcidtoresultfromsemrel/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/orcid', 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', 'saveGraph' : 'true' } @@ -556,7 +563,6 @@ - mark results respecting some rules as belonging to communities @@ -572,8 +578,8 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/bulktag/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/bulktag' + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/bulktag/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/bulktag' } build-report @@ -582,7 +588,6 @@ - creates relashionships between results and organizations when the organizations are associated to institutional repositories @@ -596,9 +601,10 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/affiliation/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/affiliation', - 'saveGraph' : 'true' + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/affiliation/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/affiliation', + 'saveGraph' : 'true', + 'blacklist' : 'empty' } build-report @@ -607,7 +613,6 @@ - marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap @@ -622,8 +627,8 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_organization/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/community_organization', + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_organization/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/community_organization', 'saveGraph' : 'true' } @@ -633,7 +638,6 @@ - created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects @@ -647,8 +651,8 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/funding/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/funding', + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/funding/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/funding', 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', 'saveGraph' : 'true' } @@ -659,7 +663,6 @@ - tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities @@ -674,8 +677,8 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_semrel/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/community_semrel', + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_semrel/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/community_semrel', 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', 'saveGraph' : 'true' } @@ -686,7 +689,6 @@ - associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from @@ -700,12 +702,12 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/country/oozie_app', + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/country/oozie_app', 'sparkExecutorCores' : '3', 'sparkExecutorMemory' : '10G', - 'workingDir' : '/tmp/core_provision/working_dir/country', + 'workingDir' : '/tmp/prod_provision/working_dir/country', 'allowedtypes' : 'pubsrepository::institutional', - 'whitelist' : '10|opendoar____::300891a62162b960cf02ce3827bb363c', + 'whitelist' : '10|openaire____::e783372970a1dc066ce99c673090ff88;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0', 'saveGraph' : 'true' } @@ -715,7 +717,6 @@ - clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid @@ -730,8 +731,8 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/clean/oozie_app', - 'workingPath' : '/tmp/core_provision/working_dir/clean' + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/clean' } build-report @@ -740,7 +741,6 @@ - removes blacklisted relations @@ -754,10 +754,10 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/blacklist/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/blacklist', - 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/blacklist/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/blacklist', + 'postgresURL' : '', + 'postgresUser' : '', 'postgresPassword' : '' } @@ -767,11 +767,10 @@ - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 + wf_20210723_171026_279 + 2021-07-24T00:00:39+00:00 SUCCESS diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_to_hiveDB.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml similarity index 86% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_to_hiveDB.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml index 0ace12ea3..836e69d6f 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_to_hiveDB.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml @@ -4,10 +4,10 @@ - + - Graph to HiveDB [OCEAN] + Graph to HiveDB [PROD] Data Provision 30 @@ -31,12 +31,11 @@ - wait configurations - + @@ -52,7 +51,10 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/hive/oozie_app' + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/hive/oozie_app', + 'sparkDriverMemory' : '4G', + 'sparkExecutorMemory' : '10G', + 'sparkExecutorCores' : '3' } build-report @@ -61,11 +63,10 @@ - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 + wf_20210728_075001_400 + 2021-07-28T08:04:00+00:00 SUCCESS diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_solr.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml similarity index 82% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_solr.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml index 8a7738bcf..6cdf41bb6 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_solr.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml @@ -4,18 +4,18 @@ - + - Update Solr [OCEAN] + Update Solr [PROD] Data Provision 30 - Set the path containing the AGGREGATOR graph + Set the path containing the GRAPH to index inputGraphRootPath - + /tmp/prod_provision/graph/14_graph_blacklisted @@ -25,7 +25,7 @@ Set the target path to store the RAW graph format - TMF + DMF @@ -35,13 +35,12 @@ Set the lookup address isLookupUrl - http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl + http://services.openaire.eu:8280/is/services/isLookUp?wsdl - wait configurations @@ -63,13 +62,16 @@ { - 'oozie.wf.application.path' : '/lib/dnet/oa/provision/oozie_app', - 'maxRelations' : '100', + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/provision/oozie_app', + 'sourceMaxRelations' : '1000', + 'targetMaxRelations' : '10000000', 'relPartitions' : '3000', 'batchSize' : '2000', - 'relationFilter' : 'isAuthorInstitutionOf,produces,hasAmongTopNSimilarDocuments', + 'relationFilter' : 'isAuthorInstitutionOf,produces,hasAmongTopNSimilarDocuments,cites,isCitedBy', 'otherDsTypeId' : 'scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource', 'resumeFrom' : 'prepare_relations', + 'shouldIndex' : 'true', + 'outputFormat' : 'SOLR', 'sparkDriverMemoryForJoining' : '3G', 'sparkExecutorMemoryForJoining' : '7G', 'sparkExecutorCoresForJoining' : '4', @@ -77,7 +79,7 @@ 'sparkExecutorMemoryForIndexing' : '2G', 'sparkExecutorCoresForIndexing' : '64', 'sparkNetworkTimeout' : '600', - 'workingDir' : '/tmp/beta_provision/working_dir/update_solr' + 'workingDir' : '/tmp/prod_provision/working_dir/update_solr' } build-report @@ -86,11 +88,10 @@ - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 + wf_20210724_062705_620 + 2021-07-25T13:25:37+00:00 SUCCESS diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_stats.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml similarity index 50% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_stats.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml index a91b6302e..4dfae3c7d 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_stats.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml @@ -4,34 +4,53 @@ - + - Update Stats [OCEAN] + Update Stats [PROD] Data Provision 30 - Set the path containing the AGGREGATOR graph + Set the OpenAIRE graph DB name openaire_db_name - + openaire_prod_yyyyMMdd - - Set the target path to store the RAW graph + + Set the STATS DB name stats_db_name - + openaire_prod_stats_yyyyMMdd + + + + + + + Set the STATS MONITOR DB name + + monitor_db_name + openaire_prod_stats_monitor_yyyyMMdd + + + + + + + Set the STATS OBSERVATORY DB name + + observatory_db_name + openaire_prod_stats_observatory_yyyyMMdd - wait configurations @@ -40,20 +59,28 @@ - create the AGGREGATOR graph + update the content in the stats DB executeOozieJob IIS { 'openaire_db_name' : 'openaire_db_name', - 'stats_db_name' : 'stats_db_name' + 'stats_db_name' : 'stats_db_name', + 'monitor_db_name' : 'monitor_db_name', + 'observatory_db_name' : 'observatory_db_name' } { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/stats/oozie_app', - 'hive_timeout' : '3000' + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/stats_update/oozie_app', + 'hive_timeout' : '15000', + 'stats_tool_api_url' : 'https://services.openaire.eu/stats-tool', + 'stats_db_shadow_name' : 'openaire_prod_stats_shadow', + 'external_stats_db_name' : 'stats_ext', + 'monitor_db_shadow_name' : 'openaire_prod_stats_monitor_shadow', + 'observatory_db_shadow_name' : 'openaire_prod_stats_observatory_shadow', + 'context_api_url' : 'https://services.openaire.eu/openaire' } build-report @@ -62,11 +89,10 @@ - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 + wf_20210725_065608_71 + 2021-07-26T07:35:55+00:00 SUCCESS diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml new file mode 100644 index 000000000..d8def071f --- /dev/null +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml @@ -0,0 +1,87 @@ + +
+ + + + + +
+ + Publish Stats [PROD] + Content Publishing + 35 + + + Set the STATS DB name + + stats_db_name + openaire_prod_stats_yyyyMMdd + + + + + + + Set the STATS MONITOR DB name + + monitor_db_name + openaire_prod_stats_monitor_yyyyMMdd + + + + + + + Set the STATS OBSERVATORY DB name + + observatory_db_name + openaire_prod_stats_observatory_yyyyMMdd + + + + + + + wait configurations + + + + + + + publishes the stats DB to the public schema + + executeOozieJob + IIS + + { + 'stats_db_name' : 'stats_db_name', + 'monitor_db_name' : 'monitor_db_name', + 'observatory_db_name' : 'observatory_db_name' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/stats_promote/oozie_app', + 'hive_timeout' : '150000', + 'stats_tool_api_url' : 'https://services.openaire.eu/stats-tool', + 'stats_db_production_name' : 'openaire_prod_stats', + 'monitor_db_production_name' : 'openaire_prod_stats_monitor', + 'observatory_db_production_name' : 'openaire_prod_stats_observatory' + } + + build-report + + + + + + + + wf_20210727_160728_625 + 2021-07-27T16:53:01+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml new file mode 100644 index 000000000..cf337fd7e --- /dev/null +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml @@ -0,0 +1,131 @@ + +
+ + + + + +
+ + Update Broker events [PROD OCEAN] + Data Provision + 30 + + + Set the path containing the GRAPH to scan + + graphInputPath + + + + + + + + Set the datasource Ids Whitelist + + datasourceIdWhitelist + openaire____::9ecafa3655143cbc4bc75853035cd432,opendoar____::dc6e224a8d74ce03bf301152d6e33e97,openaire____::09da65eaaa6deac2f785df1e0ae95a06,openaire____::3db634fc5446f389d0b826ea400a5da6,openaire____::5a38cb462ac487bf26bdb86009fe3e74,openaire____::3c29379cc184f66861e858bc7aa9615b,openaire____::4657147e48a1f32637bfe3743bce76c6,openaire____::c3267ea1c3f378c456209b6df241624e,opendoar____::358aee4cc897452c00244351e4d91f69,re3data_____::7b0ad08687b2c960d5aeef06f811d5e6,opendoar____::798ed7d4ee7138d49b8828958048130a,opendoar____::6f4922f45568161a8cdf4ad2299f6d23,opendoar____::4aa0e93b918848be0b7728b4b1568d8a,openaire____::02b55e4f52388520bfe11f959f836e68 + + + + + + + Set the datasource type Whitelist + + datasourceTypeWhitelist + pubsrepository::unknown,pubsrepository::institutional,pubsrepository::thematic,datarepository::unknown,orprepository,softwarerepository + + + + + + + Set the datasource Id Blacklist + + datasourceIdBlacklist + - + + + + + + + Set the TOPIC whitelist (* = all topics) + + topicWhitelist + ENRICH/MISSING/SUBJECT/DDC,ENRICH/MISSING/SUBJECT/JEL,ENRICH/MISSING/SUBJECT/MESHEUROPMC,ENRICH/MISSING/PUBLICATION_DATE,ENRICH/MISSING/PID,ENRICH/MISSING/PROJECT,ENRICH/MISSING/SUBJECT/ACM,ENRICH/MISSING/SUBJECT/ARXIV,ENRICH/MISSING/OPENACCESS_VERSION,ENRICH/MISSING/AUTHOR/ORCID,ENRICH/MISSING/ABSTRACT,ENRICH/MORE/SUBJECT/ACM,ENRICH/MORE/SUBJECT/ARXIV,ENRICH/MORE/SUBJECT/DDC,ENRICH/MORE/SUBJECT/JEL,ENRICH/MORE/OPENACCESS_VERSION,ENRICH/MORE/SUBJECT/MESHEUROPMC,ENRICH/MORE/PID + + + + + + + Set the output path to store the Event records + + outputDir + /var/lib/dnet/broker_PROD/events + + + + + + + wait configurations + + + + + + + update the BROKER events + + executeOozieJob + IIS + + { + 'graphInputPath' : 'graphInputPath', + 'datasourceIdWhitelist' : 'datasourceIdWhitelist', + 'datasourceTypeWhitelist' : 'datasourceTypeWhitelist', + 'datasourceIdBlacklist' : 'datasourceIdBlacklist', + 'topicWhitelist' : 'topicWhitelist', + 'outputDir' : 'outputDir' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/broker/generate_events/oozie_app', + 'esEventIndexName' : '', + 'esNotificationsIndexName' : '', + 'esIndexHost' : '', + 'maxIndexedEventsForDsAndTopic' : '100', + 'esBatchWriteRetryCount' : '8', + 'esBatchWriteRetryWait' : '60s', + 'esBatchSizeEntries' : '200', + 'esNodesWanOnly' : 'true', + 'brokerApiBaseUrl' : '', + 'brokerDbUrl' : '', + 'brokerDbUser' : '', + 'brokerDbPassword' : '', + 'sparkDriverMemory' : '3G', + 'sparkExecutorMemory' : '7G', + 'sparkExecutorCores' : '6', + 'workingDir' : '/tmp/prod_provision/working_dir/broker_events' + } + + build-report + + + + + + + + wf_20210709_073839_206 + 2021-07-09T11:01:01+00:00 + FAILURE + + + +
\ No newline at end of file diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index cfdf36573..22ee77619 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -24,8 +24,6 @@ dhp-dedup-openaire dhp-enrichment dhp-graph-provision - - dhp-blacklist dhp-stats-update dhp-stats-promote diff --git a/pom.xml b/pom.xml index 433c88093..61b0ad873 100644 --- a/pom.xml +++ b/pom.xml @@ -205,6 +205,7 @@ dateparser 1.0.7 + me.xuender unidecode @@ -519,6 +520,17 @@ ${common.text.version} + + com.opencsv + opencsv + 5.5 + + + io.github.classgraph + classgraph + 4.8.71 + + @@ -741,7 +753,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.7.15] + [2.7.18] [4.0.3] [6.0.5] [3.1.6]