From acd60563309629d6d349f0aa0bdf92a7bf44b8a4 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 16 Jul 2021 12:47:10 +0200 Subject: [PATCH 001/166] added shell action to automatically download the new dump and put it in a specified hdfs location --- .../doiboost/preprocess/oozie_app/download.sh | 2 ++ .../preprocess/oozie_app/workflow.xml | 25 ++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh new file mode 100644 index 000000000..98984e249 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh @@ -0,0 +1,2 @@ +#!bin/bash +curl -LSs $1 | hdfs dfs -put - $2$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index 03f7b7566..d63e54b8d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -63,12 +63,14 @@ + ${wf:conf('resumeFrom') eq 'Skip'} + ${wf:conf('resumeFrom') eq 'ImportCrossRef'} ${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'} ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'} ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} ${wf:conf('resumeFrom') eq 'ConvertMagToDataset'} ${wf:conf('resumeFrom') eq 'PreProcessORCID'} - + @@ -76,6 +78,27 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${url} + ${crossrefDumpPath} + ${crossrefdumpfilename} + download.sh + + + + + + ${jobTracker} From c4b18e6ccb2946d9de0923b5d5bab5521907cae6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 16 Jul 2021 15:01:25 +0200 Subject: [PATCH 002/166] changed the download.sh, added skip step to allow to not execute one phase and changed the workflow sequence of steps --- .../dhp/doiboost/preprocess/oozie_app/download.sh | 2 +- .../dhp/doiboost/preprocess/oozie_app/workflow.xml | 13 +++++++------ .../dhp/doiboost/process/oozie_app/workflow.xml | 1 + 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh index 98984e249..dfb0db708 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh @@ -1,2 +1,2 @@ #!bin/bash -curl -LSs $1 | hdfs dfs -put - $2$3 \ No newline at end of file +curl -LSs $1 | hdfs dfs -put - $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index d63e54b8d..a1b8804fa 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -70,7 +70,7 @@ ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} ${wf:conf('resumeFrom') eq 'ConvertMagToDataset'} ${wf:conf('resumeFrom') eq 'PreProcessORCID'} - + @@ -78,6 +78,7 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + ${jobTracker} @@ -105,7 +106,7 @@ ${nameNode} eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords --hdfsServerUri${nameNode} - --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz + --crossrefFileNameTarGz${crossrefdumpfilename} --workingPath${crossrefDumpPath} --outputPath${crossrefDumpPath}/files/ @@ -161,16 +162,16 @@ --targetPath${inputPathCrossref}/crossref_ds - + - - + + - + diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index f845d97f3..e75e1d8e1 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -75,6 +75,7 @@ + ${wf:conf('resumeFrom') eq 'Skip'} ${wf:conf('resumeFrom') eq 'PreprocessMag'} ${wf:conf('resumeFrom') eq 'PreprocessUW'} ${wf:conf('resumeFrom') eq 'ProcessORCID'} From 63553a76b3a9e491672c98177be1883a91035fd8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 22 Jul 2021 12:01:48 +0200 Subject: [PATCH 003/166] added code to download gold issn list from unibi --- dhp-common/pom.xml | 10 ++ .../project/utils/CSVParser.java | 7 +- .../actionmanager/project/utils/ReadCSV.java | 16 +- .../project/utils/ReadExcel.java | 2 +- .../dhp/actionmanager/project/parameters.json | 5 + dhp-workflows/dhp-doiboost/pom.xml | 7 +- .../main/java/eu/dnetlib/doiboost/GetCSV.java | 37 +++++ .../eu/dnetlib/doiboost/UnibiGoldModel.java | 151 ++++++++++++++++++ .../download_unibi_issn_gold_parameters.json | 39 +++++ .../preprocess/oozie_app/workflow.xml | 14 ++ .../eu/dnetlib/dhp/doiboost/GetCSVTest.java | 43 +++++ .../dhp/doiboost/issn_gold_oa_version_4.csv | 73 +++++++++ .../oa/provision/utils/XmlRecordFactory.java | 2 +- 13 files changed, 398 insertions(+), 8 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json create mode 100644 dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java create mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 74f31cf35..b32039e32 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -112,6 +112,16 @@ eu.dnetlib.dhp dhp-schemas + + + org.apache.commons + commons-csv + 1.8 + + + + + diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java index 8bdce903b..1d839bec5 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java @@ -16,10 +16,15 @@ import org.apache.commons.lang.reflect.FieldUtils; public class CSVParser { public List parse(String csvFile, String classForName) + throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException { + return parse(csvFile, classForName, ';'); + } + + public List parse(String csvFile, String classForName, char delimiter) throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException { final CSVFormat format = CSVFormat.EXCEL .withHeader() - .withDelimiter(';') + .withDelimiter(delimiter) .withQuote('"') .withTrim(); List ret = new ArrayList<>(); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java index c73f7ec3d..f9118350f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java @@ -6,6 +6,7 @@ import java.io.Closeable; import java.io.IOException; import java.io.OutputStreamWriter; import java.nio.charset.StandardCharsets; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; @@ -29,6 +30,7 @@ public class ReadCSV implements Closeable { private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final String csvFile; + private final char delimiter; public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -44,19 +46,23 @@ public class ReadCSV implements Closeable { final String hdfsPath = parser.get("hdfsPath"); final String hdfsNameNode = parser.get("hdfsNameNode"); final String classForName = parser.get("classForName"); - - try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL)) { + Optional delimiter = Optional.ofNullable(parser.get("delimiter")); + char del = ';'; + if (delimiter.isPresent()) + del = delimiter.get().charAt(0); + try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL, del)) { log.info("Getting CSV file..."); readCSV.execute(classForName); } + } public void execute(final String classForName) throws Exception { CSVParser csvParser = new CSVParser(); csvParser - .parse(csvFile, classForName) + .parse(csvFile, classForName, delimiter) .stream() .forEach(p -> write(p)); @@ -70,7 +76,8 @@ public class ReadCSV implements Closeable { public ReadCSV( final String hdfsPath, final String hdfsNameNode, - final String fileURL) + final String fileURL, + char delimiter) throws Exception { this.conf = new Configuration(); this.conf.set("fs.defaultFS", hdfsNameNode); @@ -85,6 +92,7 @@ public class ReadCSV implements Closeable { this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); this.csvFile = httpConnector.getInputSource(fileURL); + this.delimiter = delimiter; } protected void write(final Object p) { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java index 5ce0a681c..a13d9b791 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java @@ -31,7 +31,7 @@ public class ReadExcel implements Closeable { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - ReadCSV.class + ReadExcel.class .getResourceAsStream( "/eu/dnetlib/dhp/actionmanager/project/parameters.json"))); diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json index b6c9c94b9..9ccb70a9f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json @@ -28,6 +28,11 @@ "paramLongName" : "sheetName", "paramDescription" : "the name of the sheet in case the file is excel", "paramRequired" : false +}, { + "paramName": "d", + "paramLongName" : "delimiter", + "paramDescription" : "the delimiter between fields in case it is not ;", + "paramRequired" : false } diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index f496ea9a2..ea8832754 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -82,7 +82,12 @@ org.apache.commons commons-text - + + eu.dnetlib.dhp + dhp-aggregation + 1.2.4-SNAPSHOT + compile + diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java new file mode 100644 index 000000000..00b6b184b --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java @@ -0,0 +1,37 @@ + +package eu.dnetlib.doiboost; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class GetCSV { + private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV.class); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + GetCSV.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json"))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + final String hdfsPath = parser.get("hdfsPath"); + final String hdfsNameNode = parser.get("hdfsNameNode"); + final String classForName = parser.get("classForName"); + + try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL, ',')) { + + log.info("Getting CSV file..."); + readCSV.execute(classForName); + + } + } + +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java new file mode 100644 index 000000000..e5bd49ada --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java @@ -0,0 +1,151 @@ + +package eu.dnetlib.doiboost; + +import java.io.Serializable; + +public class UnibiGoldModel implements Serializable { + private String ISSN; + private String ISSN_L; + private String ISSN_IN_DOAJ; + private String ISSN_IN_ROAD; + private String ISSN_IN_PMC; + private String ISSN_IN_OAPC; + private String ISSN_IN_WOS; + private String ISSN_IN_SCOPUS; + private String JOURNAL_IN_DOAJ; + private String JOURNAL_IN_ROAD; + private String JOURNAL_IN_PMC; + private String JOURNAL_IN_OAPC; + private String JOURNAL_IN_WOS; + private String JOURNAL_IN_SCOPUS; + private String TITLE; + private String TITLE_SOURCE; + + public String getISSN() { + return ISSN; + } + + public void setISSN(String ISSN) { + this.ISSN = ISSN; + } + + public String getISSN_L() { + return ISSN_L; + } + + public void setISSN_L(String ISSN_L) { + this.ISSN_L = ISSN_L; + } + + public String getISSN_IN_DOAJ() { + return ISSN_IN_DOAJ; + } + + public void setISSN_IN_DOAJ(String ISSN_IN_DOAJ) { + this.ISSN_IN_DOAJ = ISSN_IN_DOAJ; + } + + public String getISSN_IN_ROAD() { + return ISSN_IN_ROAD; + } + + public void setISSN_IN_ROAD(String ISSN_IN_ROAD) { + this.ISSN_IN_ROAD = ISSN_IN_ROAD; + } + + public String getISSN_IN_PMC() { + return ISSN_IN_PMC; + } + + public void setISSN_IN_PMC(String ISSN_IN_PMC) { + this.ISSN_IN_PMC = ISSN_IN_PMC; + } + + public String getISSN_IN_OAPC() { + return ISSN_IN_OAPC; + } + + public void setISSN_IN_OAPC(String ISSN_IN_OAPC) { + this.ISSN_IN_OAPC = ISSN_IN_OAPC; + } + + public String getISSN_IN_WOS() { + return ISSN_IN_WOS; + } + + public void setISSN_IN_WOS(String ISSN_IN_WOS) { + this.ISSN_IN_WOS = ISSN_IN_WOS; + } + + public String getISSN_IN_SCOPUS() { + return ISSN_IN_SCOPUS; + } + + public void setISSN_IN_SCOPUS(String ISSN_IN_SCOPUS) { + this.ISSN_IN_SCOPUS = ISSN_IN_SCOPUS; + } + + public String getJOURNAL_IN_DOAJ() { + return JOURNAL_IN_DOAJ; + } + + public void setJOURNAL_IN_DOAJ(String JOURNAL_IN_DOAJ) { + this.JOURNAL_IN_DOAJ = JOURNAL_IN_DOAJ; + } + + public String getJOURNAL_IN_ROAD() { + return JOURNAL_IN_ROAD; + } + + public void setJOURNAL_IN_ROAD(String JOURNAL_IN_ROAD) { + this.JOURNAL_IN_ROAD = JOURNAL_IN_ROAD; + } + + public String getJOURNAL_IN_PMC() { + return JOURNAL_IN_PMC; + } + + public void setJOURNAL_IN_PMC(String JOURNAL_IN_PMC) { + this.JOURNAL_IN_PMC = JOURNAL_IN_PMC; + } + + public String getJOURNAL_IN_OAPC() { + return JOURNAL_IN_OAPC; + } + + public void setJOURNAL_IN_OAPC(String JOURNAL_IN_OAPC) { + this.JOURNAL_IN_OAPC = JOURNAL_IN_OAPC; + } + + public String getJOURNAL_IN_WOS() { + return JOURNAL_IN_WOS; + } + + public void setJOURNAL_IN_WOS(String JOURNAL_IN_WOS) { + this.JOURNAL_IN_WOS = JOURNAL_IN_WOS; + } + + public String getJOURNAL_IN_SCOPUS() { + return JOURNAL_IN_SCOPUS; + } + + public void setJOURNAL_IN_SCOPUS(String JOURNAL_IN_SCOPUS) { + this.JOURNAL_IN_SCOPUS = JOURNAL_IN_SCOPUS; + } + + public String getTITLE() { + return TITLE; + } + + public void setTITLE(String TITLE) { + this.TITLE = TITLE; + } + + public String getTITLE_SOURCE() { + return TITLE_SOURCE; + } + + public void setTITLE_SOURCE(String TITLE_SOURCE) { + this.TITLE_SOURCE = TITLE_SOURCE; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json new file mode 100644 index 000000000..9ccb70a9f --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json @@ -0,0 +1,39 @@ +[ + + { + "paramName": "fu", + "paramLongName" : "fileURL", + "paramDescription" : "the url of the file to download", + "paramRequired" : true + }, + { + "paramName": "hp", + "paramLongName" : "hdfsPath", + "paramDescription" : "where to save the file", + "paramRequired" : true + }, + { + "paramName": "hnn", + "paramLongName" : "hdfsNameNode", + "paramDescription" : "the name node", + "paramRequired" : true + }, + { + "paramName": "cfn", + "paramLongName" : "classForName", + "paramDescription" : "the name of the class to deserialize the csv to", + "paramRequired" : true +}, { + "paramName": "sn", + "paramLongName" : "sheetName", + "paramDescription" : "the name of the sheet in case the file is excel", + "paramRequired" : false +}, { + "paramName": "d", + "paramLongName" : "delimiter", + "paramDescription" : "the delimiter between fields in case it is not ;", + "paramRequired" : false +} + + +] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index 52f958d4d..4de1a2185 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -63,6 +63,7 @@ + ${wf:conf('resumeFrom') eq 'DownloadGoldIssn'} ${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'} ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'} ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} @@ -76,6 +77,19 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + eu.dnetlib.doiboost.GetCSV + --hdfsNameNode${nameNode} + --fileURL${unibiGoldIssnFileURL} + --hdfsPath${hdfsPath} + --classForNameeu.dnetlib.doiboost.UnibiGoldModel + + + + + + ${jobTracker} diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java new file mode 100644 index 000000000..6cfc90736 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java @@ -0,0 +1,43 @@ + +package eu.dnetlib.dhp.doiboost; + +import java.util.List; + +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser; + +public class GetCSVTest { + + @Test + public void readUnibiGoldTest() throws Exception { + + String programmecsv = IOUtils + .toString( + getClass() + .getClassLoader() + .getResourceAsStream("eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv")); + + CSVParser csvParser = new CSVParser(); + + List pl = csvParser.parse(programmecsv, "eu.dnetlib.doiboost.UnibiGoldModel", ','); + + Assertions.assertEquals(72, pl.size()); + +// ObjectMapper OBJECT_MAPPER = new ObjectMapper(); +// +// pl.forEach(res -> { +// try { +// System.out.println(OBJECT_MAPPER.writeValueAsString(res)); +// } catch (JsonProcessingException e) { +// e.printStackTrace(); +// } +// }); + + } +} diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv new file mode 100644 index 000000000..6d4b6cdcb --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv @@ -0,0 +1,73 @@ +"ISSN","ISSN_L","ISSN_IN_DOAJ","ISSN_IN_ROAD","ISSN_IN_PMC","ISSN_IN_OAPC","ISSN_IN_WOS","ISSN_IN_SCOPUS","JOURNAL_IN_DOAJ","JOURNAL_IN_ROAD","JOURNAL_IN_PMC","JOURNAL_IN_OAPC","JOURNAL_IN_WOS","JOURNAL_IN_SCOPUS","TITLE","TITLE_SOURCE" +"0001-625X","0001-625X",1,1,0,0,0,1,1,1,0,0,0,1,"Acta Mycologica","DOAJ" +"0002-0397","0002-0397",1,1,0,0,1,1,1,1,0,0,1,1,"Africa Spectrum","DOAJ" +"0003-2565","0003-2565",1,0,0,0,0,0,1,0,0,0,0,0,"Anali Pravnog Fakulteta u Beogradu","DOAJ" +"0003-424X","0003-424X",0,1,0,0,1,0,0,1,0,0,1,0,"Annales de zootechnie.","ROAD" +"0003-4827","0003-4827",0,1,0,0,0,1,0,1,0,0,0,1,"Annals of Iowa.","ROAD" +"0004-0592","0004-0592",1,1,0,0,1,1,1,1,0,0,1,1,"Archivos de Zootecnia","DOAJ" +"0004-282X","0004-282X",1,1,0,0,1,1,1,1,0,0,1,1,"Arquivos de Neuro-Psiquiatria","DOAJ" +"0006-3096","0006-3096",0,1,0,0,0,0,0,1,0,0,0,0,"Biologia.","ROAD" +"0006-8705","0006-8705",1,1,0,0,1,1,1,1,0,0,1,1,"Bragantia","DOAJ" +"0007-5124","0007-5124",0,1,0,0,1,0,0,1,1,0,1,1,"Experimental animals.","ROAD" +"0007-9502","0007-9502",0,1,0,0,0,0,0,1,0,0,0,0,"Caesaraugusta.","ROAD" +"0008-7386","0008-7386",1,1,0,0,0,1,1,1,0,0,0,1,"Časopis pro Moderní Filologii","DOAJ" +"0008-7629","0008-7629",1,0,0,0,0,0,1,0,0,0,0,0,"Catalogue and Index","DOAJ" +"0015-573X","0015-573X",0,1,0,0,0,0,0,1,0,0,0,0,"Folia quaternaria.","ROAD" +"0016-6987","0016-6987",1,0,0,0,1,1,1,0,0,0,1,1,"Genus","DOAJ" +"0016-7789","0016-7789",1,1,0,0,0,1,1,1,0,0,0,1,"Geologija ","DOAJ" +"0021-5007","0021-5007",0,1,0,0,0,1,0,1,0,0,0,1,"Nihon Seitai Gakkaishi.","ROAD" +"0023-4001","0023-4001",0,1,0,0,1,1,0,1,0,0,1,1,"Korean Journal of Parasitology","ROAD" +"0023-5415","0023-5415",1,1,0,0,0,0,1,1,0,0,0,0,"Kunst og Kultur","DOAJ" +"0026-1165","0026-1165",1,0,0,0,1,1,1,0,0,0,1,1,"Journal of the Meteorological Society of Japan","DOAJ" +"0029-0181","0029-0181",0,1,0,0,0,0,0,1,0,0,0,0,"Nihon butsuri gakkaishi.","ROAD" +"0034-7000","0034-7000",1,1,0,0,0,1,1,1,0,0,0,1,"Revista Argentina de Cardiología","DOAJ" +"0034-7523","0034-7523",0,1,0,0,0,1,0,1,0,0,0,1,"Revista cubana de medicina.","ROAD" +"0034-8244","0034-8244",1,0,0,0,1,1,1,0,0,0,1,1,"Revista de Filosofia","DOAJ" +"0034-8678","0034-8678",1,0,0,0,0,0,1,0,0,0,0,0,"Revista de Pedagogie","DOAJ" +"0036-8709","0036-8709",1,1,1,0,1,1,1,1,1,0,1,1,"Scientia Pharmaceutica","DOAJ" +"0044-4855","0044-4855",0,1,0,0,0,0,0,1,0,0,0,0,"Život i škola.","ROAD" +"0048-7449","0048-7449",1,1,0,0,1,1,1,1,0,0,1,1,"Reumatismo","DOAJ" +"0048-766X","0048-766X",0,1,0,0,0,1,0,1,0,0,0,1,"Revista chilena de obstetricia y ginecología.","ROAD" +"0065-1400","0065-1400",0,1,0,0,1,1,0,1,0,0,1,1,"Acta Neurobiologiae Experimentalis.","ROAD" +"0066-6742","0066-6742",1,0,0,0,1,1,1,0,0,0,1,1,"Archivo Español de Arqueología","DOAJ" +"0073-2435","0073-2435",1,1,0,0,1,1,1,1,0,0,1,1,"Historia (Santiago)","DOAJ" +"0073-4918","0073-4918",0,1,0,0,0,0,0,1,0,0,0,0,"Illinois Natural History Survey bulletin.","ROAD" +"0075-7411","0075-7411",1,0,0,0,0,0,1,0,0,0,0,0,"Anales","DOAJ" +"0077-2704","0077-2704",0,1,0,0,0,0,0,1,0,0,0,0,"Namn och bygd.","ROAD" +"0078-5466","0078-5466",0,1,0,0,1,1,0,1,0,0,1,1,"Optica Applicata.","ROAD" +"0079-4929","0079-4929",1,1,0,0,0,0,1,1,0,0,0,0,"Právněhistorické studie","DOAJ" +"0100-3283","0100-3283",0,1,0,0,0,0,0,1,0,0,0,0,"Hansenologia Internationalis.","ROAD" +"0100-4042","0100-4042",1,1,0,0,1,1,1,1,0,0,1,1,"Química Nova","DOAJ" +"0100-8692","0100-8692",1,1,0,0,1,0,1,1,0,0,1,1,"Arquivos Brasileiros de Psicologia ","DOAJ" +"0102-4469","0102-4469",1,0,0,0,0,0,1,0,0,0,0,0,"Perspectiva Teológica","DOAJ" +"0102-6992","0102-6992",1,1,0,0,0,1,1,1,0,0,0,1,"Sociedade e Estado","DOAJ" +"0103-1570","0103-1570",1,1,0,0,0,0,1,1,0,0,0,0,"Revista Sociedade & Natureza","DOAJ" +"0103-2070","0103-2070",1,1,0,0,1,1,1,1,0,0,1,1,"Tempo Social","DOAJ" +"0104-0588","0104-0588",1,1,0,0,1,0,1,1,0,0,1,0,"Revista de Estudos da Linguagem","DOAJ" +"0104-6497","0104-6497",1,1,0,0,1,0,1,1,0,0,1,0,"Nauplius","DOAJ" +"0104-8929","0104-8929",0,1,0,0,0,0,0,1,0,0,0,0,"Saeculum.","ROAD" +"0104-9496","0104-9496",1,0,0,0,0,0,1,0,0,0,0,0,"Revista do Direito","DOAJ" +"0120-0380","0120-0380",0,1,0,0,1,0,0,1,0,0,1,0,"Boletín de matemáticas.","ROAD" +"0120-100X","0120-100X",1,1,0,0,0,0,1,1,0,0,0,0,"Revista Ion","DOAJ" +"0120-4807","0120-4807",1,1,0,0,0,0,1,1,0,0,0,0,"Universitas Humanística","DOAJ" +"0121-4004","0121-4004",1,0,0,0,1,1,1,0,0,0,1,1,"Vitae","DOAJ" +"0121-4500","0121-4500",1,1,0,0,0,0,1,1,0,0,0,0,"Avances en Enfermería","DOAJ" +"0121-8697","0121-8697",1,0,0,0,0,0,1,0,0,0,0,0,"Revista de Derecho","DOAJ" +"0122-5197","0122-5197",0,1,0,0,0,1,0,1,0,0,0,1,"Memoria y Sociedad.","ROAD" +"0161-0457","0161-0457",1,0,1,1,1,1,1,0,1,1,1,1,"Scanning","DOAJ" +"0215-4706","0215-4706",1,1,0,0,0,0,1,1,0,0,0,0,"Floribunda.","ROAD" +"0324-6000","0324-6000",0,1,0,0,1,1,0,1,0,0,1,1,"Periodica polytechnica. Electrical engineering","ROAD" +"0325-187X","0325-187X",1,1,0,0,0,1,1,1,0,0,0,1,"Meteorologica","DOAJ" +"0326-7237","0326-7237",1,1,0,0,0,1,1,1,0,0,0,1,"Geoacta.","ROAD" +"0327-1676","0327-1676",0,1,0,0,0,0,1,1,0,0,0,0,"Andes","DOAJ" +"0327-2818","0327-2818",1,0,0,0,0,0,1,0,0,0,0,0,"Dominguezia","DOAJ" +"0327-5108","0327-5108",1,0,0,0,0,0,1,0,0,0,0,0,"Páginas de Filosofía","DOAJ" +"0327-585X","0327-585X",1,1,0,0,0,0,1,1,0,0,0,0,"Actualidad Económica","DOAJ" +"0327-6147","0327-6147",0,1,0,0,0,0,0,1,0,0,0,0,"Papeles de trabajo.","ROAD" +"0327-7763","0327-7763",0,1,0,0,0,0,0,1,0,0,0,0,"Revista del IICE.","ROAD" +"0327-9286","0327-9286",1,1,0,0,0,0,1,1,0,0,0,0,"Acta Toxicológica Argentina","DOAJ" +"0328-1205","0328-1205",1,1,0,0,1,1,1,1,0,0,1,1,"Synthesis (La Plata)","DOAJ" +"0329-5893","0329-5893",0,1,0,0,0,0,0,1,0,0,0,0,"Investigaciones en psicología..","ROAD" +"0329-8213","0329-8213",1,0,0,0,0,0,1,0,0,0,0,0,"Historia Regional","DOAJ" +"0332-5024","0332-5024",1,1,0,0,0,0,1,1,0,0,0,0,"Studia Musicologica Norvegica","DOAJ" +"0350-185X","0350-185X",1,1,0,0,0,0,1,1,0,0,0,0,"Južnoslovenski Filolog","DOAJ" diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index a985d2371..2c8240290 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -16,7 +16,6 @@ import javax.xml.transform.*; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import org.apache.commons.lang3.StringUtils; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; @@ -43,6 +42,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { From eb07f7f40f76b9a9c0a3f72549908c4432779558 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 27 Jul 2021 12:27:26 +0200 Subject: [PATCH 004/166] Hosted By Map --- .../eu/dnetlib/dhp/transform/terms.txt | 2 +- .../eu/dnetlib/dhp/transform/input_itgv4.xml | 2 +- ...c_cleaning_OPENAIREplus_compliant_hal_orig | 6 +- ...e_datacite_ExchangeLandingpagePid_orig.xsl | 20 +-- ...enaire_datacite_ExchangeLandingpagePid.xsl | 14 +- .../eu/dnetlib/dhp/transform/terms.txt | 2 +- .../crossref_mapping.csv | 4 +- .../dhp/doiboost/issn_gold_oa_version_4.csv | 2 +- dhp-workflows/dhp-graph-mapper/pom.xml | 5 + .../dhp/oa/graph/hostebymap/Aggregators.scala | 54 ++++++ .../dhp/oa/graph/hostebymap/Constants.java | 15 ++ .../dhp/oa/graph/hostebymap/GetCSV.java | 111 ++++++++++++ .../SparkPrepareHostedByMapData.scala | 158 ++++++++++++++++++ .../oa/graph/hostebymap/model/DOAJModel.java | 53 ++++++ .../hostebymap/model/UnibiGoldModel.java | 44 +++++ .../oa/graph/hostedbymap/TestPreprocess.scala | 59 +++++++ .../dhp/oa/graph/hostedbymap/TestReadCSV.java | 111 ++++++++++++ .../eu/dnetlib/dhp/oa/graph/clean/terms.txt | 2 +- .../dhp/oa/graph/hostedbymap/datasource.json | 10 ++ .../graph/hostedbymap/doaj_transformed.json | 25 +++ .../dhp/oa/graph/hostedbymap/unibiGold.csv | 37 ++++ .../graph/hostedbymap/unibi_transformed.json | 29 ++++ .../dhp/oa/graph/raw/oaf_claim_dedup.xml | 4 +- .../dhp/sx/graph/bio/pubmed/pubmed.xml | 46 ++--- .../eu/dnetlib/dhp/transform/terms.txt | 2 +- 25 files changed, 764 insertions(+), 53 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt b/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt index 93cc00eca..30138b5bc 100644 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN +datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml index 06325810b..b421f1342 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml @@ -31,7 +31,7 @@ https://pub.uni-bielefeld.de/record/1997560.json - 0016-9056 + 0016-9056 ger Friedrich diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig index d4eb71dc4..505f56294 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig @@ -134,9 +134,9 @@ oaf:identifier = set(xpath:"//dri:recordIdentifier", @identifierType = "oai-orig oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; // journal data -// avoiding regular expressions, while a) correcting ISSNs with no - or other letters instead of - and b) ignoring any stuff after the ISSN (as e.g. print/online/...) -$varISSN = xpath:"//dc:source[starts-with(., 'ISSN:') and string-length(.) > 12]/concat(substring(normalize-space(substring-after(., 'ISSN:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'ISSN:')), 1, 4))))"; -//$varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/normalize-space(substring-after(., 'ISSN:'))"; +// avoiding regular expressions, while a) correcting ISSNs with no - or other letters instead of - and b) ignoring any stuff after the issn (as e.g. print/online/...) +$varISSN = xpath:"//dc:source[starts-with(., 'issn:') and string-length(.) > 12]/concat(substring(normalize-space(substring-after(., 'issn:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'issn:')), 1, 4))))"; +//$varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/normalize-space(substring-after(., 'issn:'))"; $varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/concat(substring(normalize-space(substring-after(., 'EISSN:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'EISSN:')), 1, 4))))"; oaf:journal = set(xpath:"//oaf:datasourceprefix[$varISSN or $varEISSN]/''", @issn = xpath:"$varISSN";, @eissn = xpath:"$varEISSN";); diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl index 3cfaec80b..f80c91a6a 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl @@ -169,7 +169,7 @@ @@ -283,7 +283,7 @@ - + - + - + @@ -793,9 +793,9 @@ - + - + @@ -844,7 +844,7 @@ - + - + - + @@ -594,9 +594,9 @@ - + - + diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt index 93cc00eca..30138b5bc 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN +datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv b/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv index 6a5fc3f87..0c1fd9e64 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv @@ -36,10 +36,10 @@ article-number,String,No,,N/A, published-print,Partial Date,No,Date on which the work was published in print,"Result/relevantDate with Qualifier(""published-print"", ""dnet:dataCite_date"")", published-online,Partial Date,No,Date on which the work was published online,"Result/relevantDate with Qualifier(""published-online"", ""dnet:dataCite_date"")", subject,Array of String,No,"Subject category names, a controlled vocabulary from Sci-Val. Available for most journal articles","Result/subject with Qualifier(""keywords"", ""dnet:subject_classification_typologies""). ","Future improvements: map the controlled vocabulary instead of using the generic ""keywords"" qualifier" -ISSN,Array of String,No,,"Publication/Journal/issn +issn,Array of String,No,,"Publication/Journal/issn Publication/Journal/lissn Publication/Journal/eissn",The mapping depends on the value of issn-type -issn-type,Array of ISSN with Type,No,List of ISSNs with ISSN type information,N/A,Its value guides the setting of the properties in Journal (see row above) +issn-type,Array of issn with Type,No,List of ISSNs with issn type information,N/A,Its value guides the setting of the properties in Journal (see row above) ISBN,Array of String,No,,Publication/source,"In case of Book We can map ISBN and container title on Publication/source using this syntax container-title + ""ISBN: "" + ISBN" archive,Array of String,No,,N/A, license,Array of License,No,,Result/Instance/License, diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv index 6d4b6cdcb..ebde1bf18 100644 --- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv @@ -1,4 +1,4 @@ -"ISSN","ISSN_L","ISSN_IN_DOAJ","ISSN_IN_ROAD","ISSN_IN_PMC","ISSN_IN_OAPC","ISSN_IN_WOS","ISSN_IN_SCOPUS","JOURNAL_IN_DOAJ","JOURNAL_IN_ROAD","JOURNAL_IN_PMC","JOURNAL_IN_OAPC","JOURNAL_IN_WOS","JOURNAL_IN_SCOPUS","TITLE","TITLE_SOURCE" +"issn","ISSN_L","ISSN_IN_DOAJ","ISSN_IN_ROAD","ISSN_IN_PMC","ISSN_IN_OAPC","ISSN_IN_WOS","ISSN_IN_SCOPUS","JOURNAL_IN_DOAJ","JOURNAL_IN_ROAD","JOURNAL_IN_PMC","JOURNAL_IN_OAPC","JOURNAL_IN_WOS","JOURNAL_IN_SCOPUS","TITLE","TITLE_SOURCE" "0001-625X","0001-625X",1,1,0,0,0,1,1,1,0,0,0,1,"Acta Mycologica","DOAJ" "0002-0397","0002-0397",1,1,0,0,1,1,1,1,0,0,1,1,"Africa Spectrum","DOAJ" "0003-2565","0003-2565",1,0,0,0,0,0,1,0,0,0,0,0,"Anali Pravnog Fakulteta u Beogradu","DOAJ" diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 19febb9ed..665c01276 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -122,6 +122,11 @@ org.json4s json4s-jackson_2.11 + + com.opencsv + opencsv + 5.5 + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala new file mode 100644 index 000000000..d98418339 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala @@ -0,0 +1,54 @@ +package eu.dnetlib.dhp.oa.graph.hostebymap + +import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} +import org.apache.spark.sql.expressions.Aggregator + + +case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} + + +object Aggregators { + + + + def getId(s1:String, s2:String) : String = { + if (!s1.equals("")){ + return s1} + s2 + } + + + def createHostedByItemTypes(df: Dataset[HostedByItemType]): Dataset[HostedByItemType] = { + val transformedData : Dataset[HostedByItemType] = df + .groupByKey(_.id)(Encoders.STRING) + .agg(Aggregators.hostedByAggregator) + .map{ + case (id:String , res:HostedByItemType) => res + }(Encoders.product[HostedByItemType]) + + transformedData + } + + val hostedByAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { + override def zero: HostedByItemType = HostedByItemType("","","","","",false) + override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { + return merge(b, a) + } + override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + + HostedByItemType(getId(b1.id, b2.id), getId(b1.officialname, b2.officialname), getId(b1.issn, b2.issn), getId(b1.eissn, b2.eissn), getId(b1.lissn, b2.lissn), b1.openAccess || b2.openAccess) + + } + override def finish(reduction: HostedByItemType): HostedByItemType = reduction + override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + + override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + }.toColumn + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java new file mode 100644 index 000000000..b07e33cd1 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java @@ -0,0 +1,15 @@ +package eu.dnetlib.dhp.oa.graph.hostebymap; + +public class Constants { + + + + public static final String OPENAIRE = "openaire"; + public static final String DOAJ = "doaj"; + public static final String UNIBI = "unibi"; + + + public static final String ISSN = "issn"; + public static final String EISSN = "eissn"; + public static final String ISSNL = "issnl"; +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java new file mode 100644 index 000000000..d397886a3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java @@ -0,0 +1,111 @@ +package eu.dnetlib.dhp.oa.graph.hostebymap; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.opencsv.bean.CsvToBeanBuilder; +import eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel; +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.hadoop.conf.Configuration; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import java.io.*; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Optional; + +public class GetCSV { + private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.oa.graph.hostebymap.GetCSV.class); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + GetCSV.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json"))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + final String hdfsPath = parser.get("hdfsPath"); + final String hdfsNameNode = parser.get("hdfsNameNode"); + final String classForName = parser.get("classForName"); + final Boolean shouldReplace = Optional.ofNullable((parser.get("replace"))) + .map(Boolean::valueOf) + .orElse(false); + + + URLConnection connection = new URL(fileURL).openConnection(); + connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); + connection.connect(); + + BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); + + if(shouldReplace){ + PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ.csv"))); + String line = null; + while((line = in.readLine())!= null){ + writer.println(line.replace("\\\"", "\"")); + } + writer.close(); + in.close(); + in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); + } + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + Path hdfsWritePath = new Path(hdfsPath); + FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { + fileSystem.delete(hdfsWritePath, false); + } + fsDataOutputStream = fileSystem.create(hdfsWritePath); + + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + + Class clazz = Class.forName(classForName); + + ObjectMapper mapper = new ObjectMapper(); + + new CsvToBeanBuilder(in) + .withType(clazz) + .withMultilineLimit(1) + .build() + .parse() + .forEach(line -> { + try { + writer.write(mapper.writeValueAsString(line)); + writer.newLine(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + + + writer.close(); + in.close(); + if(shouldReplace){ + File f = new File("/tmp/DOAJ.csv"); + f.delete(); + } + + + } + + + + +} + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala new file mode 100644 index 000000000..55b6e36cd --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala @@ -0,0 +1,158 @@ +package eu.dnetlib.dhp.oa.graph.hostebymap + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.graph.hostebymap.model.{DOAJModel, UnibiGoldModel} +import eu.dnetlib.dhp.oa.merge.AuthorMerger +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.{Datasource, Organization, Publication, Relation} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.json4s.DefaultFormats +import org.slf4j.{Logger, LoggerFactory} +import org.json4s.jackson.Serialization.write + +import scala.collection.mutable.ListBuffer + +object SparkPrepareHostedByMapData { + + case class HostedByInfo(id: Option[String], officialname: String, journal_id: String, provenance : String, id_type: String) {} + + implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) + implicit val mapEncoderDats: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) + implicit val mapEncoderDOAJ: Encoder[DOAJModel] = Encoders.kryo[DOAJModel] + implicit val mapEncoderUnibi: Encoder[UnibiGoldModel] = Encoders.kryo[UnibiGoldModel] + implicit val mapEncoderHBI: Encoder[HostedByInfo] = Encoders.product[HostedByInfo] + + + def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)) : HostedByItemType = { + val openaire: HostedByInfo = input._1._1 + val doaj: HostedByInfo = input._1._2 + val gold: HostedByInfo = input._2 + val isOpenAccess: Boolean = doaj == null && gold == null + + openaire.journal_id match { + case Constants.ISSN => return HostedByItemType(openaire.id.get, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) + case Constants.EISSN => return HostedByItemType(openaire.id.get, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) + case Constants.ISSNL => return HostedByItemType(openaire.id.get, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) + + // catch the default with a variable so you can print it + case whoa => return null + } + } + + def toHostedByMap(input: HostedByItemType): ListBuffer[String] = { + implicit val formats = DefaultFormats + val serializedJSON:String = write(input) + + var hostedBy = new ListBuffer[String]() + if(!input.issn.equals("")){ + hostedBy += "{\"" + input.issn + "\":" + serializedJSON + "}" + } + if(!input.eissn.equals("")){ + hostedBy += "{\"" + input.eissn + "\":" + serializedJSON + "}" + } + if(!input.lissn.equals("")){ + hostedBy += "{\"" + input.lissn + "\":" + serializedJSON + "}" + } + + hostedBy + + } + + + def readOADataset(input:String, spark: SparkSession): Dataset[HostedByInfo] = { + spark.read.textFile(input).as[Datasource].flatMap(ds => { + val lst = new ListBuffer[HostedByInfo]() + if (ds.getJournal == null) { + return null + } + val issn: String = ds.getJournal.getIssnPrinted + val issnl: String = ds.getJournal.getIssnOnline + val eissn: String = ds.getJournal.getIssnOnline + val id: String = ds.getId + val officialname: String = ds.getOfficialname.getValue + if (issn != null) { + lst += HostedByInfo(Some(id), officialname, issn, Constants.OPENAIRE, Constants.ISSN) + } + if (issnl != null) { + lst += HostedByInfo(Some(id), officialname, issnl, Constants.OPENAIRE, Constants.ISSNL) + } + if (eissn != null) { + lst += HostedByInfo(Some(id), officialname, eissn, Constants.OPENAIRE, Constants.EISSN) + } + lst + }).filter(i => i != null) + } + + def main(args: Array[String]): Unit = { + + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedby/prepare_hostedby_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + import spark.implicits._ + + val datasourcePath = parser.get("datasourcePath") + val workingDirPath = parser.get("workingPath") + + + + + logger.info("Getting the Datasources") + + val doajDataset: Dataset[DOAJModel] = spark.read.load(workingDirPath + "/doaj").as[DOAJModel] + val unibiDataset: Dataset[UnibiGoldModel] = spark.read.load(datasourcePath).as[UnibiGoldModel] + + val oa: Dataset[HostedByInfo] = readOADataset(datasourcePath, spark) + + val doaj: Dataset[HostedByInfo] = doajDataset.flatMap(doaj => { + val lst = new ListBuffer[HostedByInfo]() + val issn: String = doaj.getIssn + val eissn: String = doaj.getEissn + val officialname: String = doaj.getJournalTitle + if (issn != null) { + lst += HostedByInfo(null, officialname, issn, Constants.DOAJ, Constants.ISSN) + } + if (eissn != null) { + lst += HostedByInfo(null, officialname, eissn, Constants.DOAJ, Constants.EISSN) + } + lst + }) + + val gold: Dataset[HostedByInfo] = unibiDataset.flatMap(gold => { + val lst = new ListBuffer[HostedByInfo]() + val issn: String = gold.getIssn + val issnl: String = gold.getIssn_l + val officialname: String = gold.getTitle + if (issn != null) { + lst += HostedByInfo(null, officialname, issn, Constants.UNIBI, Constants.ISSN) + } + if (issnl != null) { + lst += HostedByInfo(null, officialname, issnl, Constants.UNIBI, Constants.ISSNL) + } + lst + }) + + Aggregators.createHostedByItemTypes(oa.joinWith(doaj, oa.col("journal_id").equalTo(doaj.col("journal_id")), "left") + .joinWith(gold, $"_1.col('journal_id')".equalTo(gold.col("journal_id")), "left").map(toHostedByItemType) + .filter(i => i != null)) + .flatMap(toHostedByMap) + // .map(i => (i.id,i)) + // .groupByKey(_._1) + // .agg(hostedByAggregator.toColumn) + // .map(p => p._2) + .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/HostedByMap") + + + } + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java new file mode 100644 index 000000000..fe1d14a76 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java @@ -0,0 +1,53 @@ +package eu.dnetlib.dhp.oa.graph.hostebymap.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + + +public class DOAJModel implements Serializable { + @CsvBindByName(column = "Journal title") + private String journalTitle; + + @CsvBindByName(column = "Journal ISSN (print version)") + private String issn ; + + @CsvBindByName(column = "Journal EISSN (online version)") + private String eissn; + + @CsvBindByName(column = "Review process") + private String reviewProcess; + + + public String getJournalTitle() { + return journalTitle; + } + + public void setJournalTitle(String journalTitle) { + this.journalTitle = journalTitle; + } + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getEissn() { + return eissn; + } + + public void setEissn(String eissn) { + this.eissn = eissn; + } + + public String getReviewProcess() { + return reviewProcess; + } + + public void setReviewProcess(String reviewProcess) { + this.reviewProcess = reviewProcess; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java new file mode 100644 index 000000000..309f74eea --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java @@ -0,0 +1,44 @@ +package eu.dnetlib.dhp.oa.graph.hostebymap.model; + +import com.opencsv.bean.CsvBindByName; + +import java.io.Serializable; + +public class UnibiGoldModel implements Serializable { + @CsvBindByName(column = "ISSN") + private String issn; + @CsvBindByName(column = "ISSN_L") + private String issn_l; + @CsvBindByName(column = "TITLE") + private String title; + @CsvBindByName(column = "TITLE_SOURCE") + private String title_source; + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getIssn_l() { + return issn_l; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getTitle_source() { + return title_source; + } + + public void setTitle_source(String title_source) { + this.title_source = title_source; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala new file mode 100644 index 000000000..657f20fce --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -0,0 +1,59 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import java.sql.Timestamp + +import eu.dnetlib.dhp.oa.graph.hostebymap.SparkPrepareHostedByMapData +import eu.dnetlib.dhp.oa.graph.hostebymap.SparkPrepareHostedByMapData.HostedByInfo +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, SparkSession} +import org.codehaus.jackson.map.ObjectMapper +import org.json4s.DefaultFormats +import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue} +import org.junit.jupiter.api.Test +import org.slf4j.{Logger, LoggerFactory} + +import scala.io.Source + +class TestPreprocess { + + val logger: Logger = LoggerFactory.getLogger(getClass) + val mapper = new ObjectMapper() + + + + @Test + def readDatasource():Unit = { + + + import org.apache.spark.sql.Encoders + implicit val formats = DefaultFormats + import org.json4s.jackson.Serialization.write + + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val path = getClass.getResource("datasource.json").getPath + + + val schema = Encoders.product[HostedByInfo] + + spark.read.textFile(path).foreach(r => println(mapper.writeValueAsString(r))) + +// SparkPrepareHostedByMapData.readOADataset(path, spark) +// .foreach(r => println(write(r))) + + + spark.close() + } + + + + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java new file mode 100644 index 000000000..01c70502c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java @@ -0,0 +1,111 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.opencsv.bean.CsvToBeanBuilder; +import eu.dnetlib.dhp.oa.graph.hostebymap.GetCSV; +import eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel; +import org.junit.jupiter.api.Test; + +import java.io.*; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.List; + +public class TestReadCSV { + + @Test + public void testCSVUnibi() throws FileNotFoundException { + + + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv") + .getPath(); + + List beans = new CsvToBeanBuilder(new FileReader(sourcePath)) + .withType(UnibiGoldModel.class) + .build() + .parse(); + + ObjectMapper mapper = new ObjectMapper(); + + beans.forEach(r -> { + try { + System.out.println(mapper.writeValueAsString(r)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + }); + + + } + + @Test + public void testCSVUrlUnibi() throws IOException { + + URL csv = new URL("https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"); + + BufferedReader in = new BufferedReader(new InputStreamReader(csv.openStream())); + ObjectMapper mapper = new ObjectMapper(); + + new CsvToBeanBuilder(in) + .withType(eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel.class) + .build() + .parse() + .forEach(line -> + + { + try { + System.out.println(mapper.writeValueAsString(line)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + } + + + ); + } + + @Test + public void testCSVUrlDOAJ() throws IOException { + + URLConnection connection = new URL("https://doaj.org/csv").openConnection(); + connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); + connection.connect(); + + BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); + //BufferedReader in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); + PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv"))); + String line = null; + while((line = in.readLine())!= null){ + writer.println(line.replace("\\\"", "\"")); + } + writer.close(); + in.close(); + in = new BufferedReader(new FileReader("/tmp/DOAJ_1.csv")); + ObjectMapper mapper = new ObjectMapper(); + + + + new CsvToBeanBuilder(in) + .withType(eu.dnetlib.dhp.oa.graph.hostebymap.model.DOAJModel.class) + .withMultilineLimit(1) + .build() + .parse() + .forEach(lline -> + + { + try { + System.out.println(mapper.writeValueAsString(lline)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + } + + + ); + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt index ba47aaf5c..4db5fd463 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN +datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json new file mode 100644 index 000000000..15e1dcc45 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json @@ -0,0 +1,10 @@ +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://www.energyret.ru/jour/"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2640-4613","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"extraInfo":[],"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2605-8359","issnPrinted":"0751-4239","name":"Cahiers d’études germaniques"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl07514239"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0751-4239"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"extraInfo":[],"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2311-8733","issnPrinted":"2073-1477","name":"Regional Economics Theory and Practice"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl20731477"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2073-1477"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"extraInfo":[],"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0041-1337","name":"Transplantation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00411337"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0041-1337"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"extraInfo":[],"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"1947-9336","issnPrinted":"1947-9328","name":"International Journal of Operations Research and Information Systems"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl19479328"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::1947-9328"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"extraInfo":[],"id":"10|issn___print::b9faf9c36c47169d4328e586eb62247c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0007-1528","name":"Bulletin of the British Mycological Society"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00071528"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0007-1528"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"extraInfo":[],"id":"10|issn__online::709e633c2ecf46396a4ed1b0096da1d0","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2410-3993","issnPrinted":"","name":"Journal of Technology and Innovation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24103993"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn__online::2410-3993"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"citationguidelineurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://www.gbif.org/citation-guidelines"},"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"databaseaccesstype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"open"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"datarepository::unknown","classname":"Data Repository","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datauploadrestriction":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"other"},"datauploadtype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"restricted"},"dateofcollection":"2019-02-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bavarian Natural History Collections - occurrence data"},"extraInfo":[],"id":"10|re3data_____::b105fa2123b1e2bc3dfff303454c6f72","lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"missionstatementurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"r3b105fa2123"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DWB BioCASe Data Publication pipeline and RDF service"},"openairecompatibility":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["re3data_____::r3d100012934"],"pid":[],"pidsystems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DOI URN"},"policies":[],"qualitymanagementkind":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"yes"},"releasestartdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2006-01-01"},"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Life Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Plant Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Zoology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Evolution, Anthropology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Biology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Natural Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geology and Palaeontology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geochemistry, Mineralogy and Crystallography"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geosciences (including Geography)"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/dwb_biocase.html"}} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json new file mode 100644 index 000000000..9cec80eb4 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doaj_transformed.json @@ -0,0 +1,25 @@ +{"journalTitle":"Lëd i Sneg","issn":"2076-6734","eissn":"2412-3765","reviewProcess":"Double blind peer review"} +{"journalTitle":"Компьютерные исследования и моделирование","issn":"2076-7633","eissn":"2077-6853","reviewProcess":"Blind peer review"} +{"journalTitle":" Историко-биологические исследования","issn":"2076-8176","eissn":"2500-1221","reviewProcess":"Double blind peer review"} +{"journalTitle":"Інформаційні технології і засоби навчання","issn":"2076-8184","eissn":"","reviewProcess":"Double blind peer review"} +{"journalTitle":"Revue Internationale de Pédagogie de l’Enseignement Supérieur","issn":"","eissn":"2076-8427","reviewProcess":"Double blind peer review"} +{"journalTitle":"Проблемы развития территории","issn":"2076-8915","eissn":"2409-9007","reviewProcess":"Double blind peer review"} +{"journalTitle":"Rambam Maimonides Medical Journal","issn":"","eissn":"2076-9172","reviewProcess":"Peer review"} +{"journalTitle":"Membranes","issn":"2077-0375","eissn":"","reviewProcess":"Blind peer review"} +{"journalTitle":"Journal of Clinical Medicine","issn":"","eissn":"2077-0383","reviewProcess":"Blind peer review"} +{"journalTitle":"Agriculture","issn":"","eissn":"2077-0472","reviewProcess":"Blind peer review"} +{"journalTitle":"Standartnye Obrazcy","issn":"2077-1177","eissn":"","reviewProcess":"Double blind peer review"} +{"journalTitle":"Металл и литье Украины","issn":"2077-1304","eissn":"2706-5529","reviewProcess":"Double blind peer review"} +{"journalTitle":"Journal of Marine Science and Engineering","issn":"","eissn":"2077-1312","reviewProcess":"Blind peer review"} +{"journalTitle":"Religions","issn":"","eissn":"2077-1444","reviewProcess":"Double blind peer review"} +{"journalTitle":"GW-Unterricht","issn":"2077-1517","eissn":"2414-4169","reviewProcess":"Double blind peer review"} +{"journalTitle":"UCV-Scientia","issn":"2077-172X","eissn":"","reviewProcess":"Peer review"} +{"journalTitle":"Sovremennye Issledovaniâ Socialʹnyh Problem","issn":"2077-1770","eissn":"2218-7405","reviewProcess":"Double blind peer review"} +{"journalTitle":"Granì","issn":"2077-1800","eissn":"2413-8738","reviewProcess":"Double blind peer review"} +{"journalTitle":"Journal of Economics Finance and Administrative Science","issn":"2077-1886","eissn":"2218-0648","reviewProcess":"Double blind peer review"} +{"journalTitle":"Science Education International","issn":"","eissn":"2077-2327","reviewProcess":"Double blind peer review"} +{"journalTitle":"Edumecentro","issn":"","eissn":"2077-2874","reviewProcess":"Double blind peer review"} +{"journalTitle":"Monteverdia","issn":"","eissn":"2077-2890","reviewProcess":"Double blind peer review"} +{"journalTitle":"Transformación","issn":"","eissn":"2077-2955","reviewProcess":"Double blind peer review"} +{"journalTitle":"Journal of Space Technology","issn":"2077-3099","eissn":"2411-5029","reviewProcess":"Double blind peer review"} +{"journalTitle":"Revue de Primatologie","issn":"","eissn":"2077-3757","reviewProcess":"Peer review"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv new file mode 100644 index 000000000..eb5d93451 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv @@ -0,0 +1,37 @@ +"ISSN","ISSN_L","ISSN_IN_DOAJ","ISSN_IN_ROAD","ISSN_IN_PMC","ISSN_IN_OAPC","ISSN_IN_WOS","ISSN_IN_SCOPUS","JOURNAL_IN_DOAJ","JOURNAL_IN_ROAD","JOURNAL_IN_PMC","JOURNAL_IN_OAPC","JOURNAL_IN_WOS","JOURNAL_IN_SCOPUS","TITLE","TITLE_SOURCE" +"0001-625X","0001-625X",1,1,0,0,0,1,1,1,0,0,0,1,"Acta Mycologica","DOAJ" +"0002-0397","0002-0397",1,1,0,0,1,1,1,1,0,0,1,1,"Africa Spectrum","DOAJ" +"0003-2565","0003-2565",1,0,0,0,0,0,1,0,0,0,0,0,"Anali Pravnog Fakulteta u Beogradu","DOAJ" +"0003-424X","0003-424X",0,1,0,0,1,0,0,1,0,0,1,0,"Annales de zootechnie.","ROAD" +"0003-4827","0003-4827",0,1,0,0,0,1,0,1,0,0,0,1,"Annals of Iowa.","ROAD" +"0004-0592","0004-0592",1,1,0,0,1,1,1,1,0,0,1,1,"Archivos de Zootecnia","DOAJ" +"0004-282X","0004-282X",1,1,0,0,1,1,1,1,0,0,1,1,"Arquivos de Neuro-Psiquiatria","DOAJ" +"0006-3096","0006-3096",0,1,0,0,0,0,0,1,0,0,0,0,"Biologia.","ROAD" +"0006-8705","0006-8705",1,1,0,0,1,1,1,1,0,0,1,1,"Bragantia","DOAJ" +"0007-5124","0007-5124",0,1,0,0,1,0,0,1,1,0,1,1,"Experimental animals.","ROAD" +"0007-9502","0007-9502",0,1,0,0,0,0,0,1,0,0,0,0,"Caesaraugusta.","ROAD" +"0008-7386","0008-7386",1,1,0,0,0,1,1,1,0,0,0,1,"Časopis pro Moderní Filologii","DOAJ" +"0008-7629","0008-7629",1,0,0,0,0,0,1,0,0,0,0,0,"Catalogue and Index","DOAJ" +"0015-573X","0015-573X",0,1,0,0,0,0,0,1,0,0,0,0,"Folia quaternaria.","ROAD" +"0016-6987","0016-6987",1,0,0,0,1,1,1,0,0,0,1,1,"Genus","DOAJ" +"0016-7789","0016-7789",1,1,0,0,0,1,1,1,0,0,0,1,"Geologija ","DOAJ" +"0021-5007","0021-5007",0,1,0,0,0,1,0,1,0,0,0,1,"Nihon Seitai Gakkaishi.","ROAD" +"0023-4001","0023-4001",0,1,0,0,1,1,0,1,0,0,1,1,"Korean Journal of Parasitology","ROAD" +"0023-5415","0023-5415",1,1,0,0,0,0,1,1,0,0,0,0,"Kunst og Kultur","DOAJ" +"0026-1165","0026-1165",1,0,0,0,1,1,1,0,0,0,1,1,"Journal of the Meteorological Society of Japan","DOAJ" +"0029-0181","0029-0181",0,1,0,0,0,0,0,1,0,0,0,0,"Nihon butsuri gakkaishi.","ROAD" +"0034-7000","0034-7000",1,1,0,0,0,1,1,1,0,0,0,1,"Revista Argentina de Cardiología","DOAJ" +"0034-7523","0034-7523",0,1,0,0,0,1,0,1,0,0,0,1,"Revista cubana de medicina.","ROAD" +"0034-8244","0034-8244",1,0,0,0,1,1,1,0,0,0,1,1,"Revista de Filosofia","DOAJ" +"0034-8678","0034-8678",1,0,0,0,0,0,1,0,0,0,0,0,"Revista de Pedagogie","DOAJ" +"0036-8709","0036-8709",1,1,1,0,1,1,1,1,1,0,1,1,"Scientia Pharmaceutica","DOAJ" +"0044-4855","0044-4855",0,1,0,0,0,0,0,1,0,0,0,0,"Život i škola.","ROAD" +"0048-7449","0048-7449",1,1,0,0,1,1,1,1,0,0,1,1,"Reumatismo","DOAJ" +"0048-766X","0048-766X",0,1,0,0,0,1,0,1,0,0,0,1,"Revista chilena de obstetricia y ginecología.","ROAD" +"0065-1400","0065-1400",0,1,0,0,1,1,0,1,0,0,1,1,"Acta Neurobiologiae Experimentalis.","ROAD" +"0066-6742","0066-6742",1,0,0,0,1,1,1,0,0,0,1,1,"Archivo Español de Arqueología","DOAJ" +"0073-2435","0073-2435",1,1,0,0,1,1,1,1,0,0,1,1,"Historia (Santiago)","DOAJ" +"0073-4918","0073-4918",0,1,0,0,0,0,0,1,0,0,0,0,"Illinois Natural History Survey bulletin.","ROAD" +"0075-7411","0075-7411",1,0,0,0,0,0,1,0,0,0,0,0,"Anales","DOAJ" +"0077-2704","0077-2704",0,1,0,0,0,0,0,1,0,0,0,0,"Namn och bygd.","ROAD" +"0078-5466","0078-5466",0,1,0,0,1,1,0,1,0,0,1,1,"Optica Applicata.","ROAD" \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json new file mode 100644 index 000000000..d4acba4a9 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json @@ -0,0 +1,29 @@ +{"issn":"2502-731X","issn_l":"2502-731X","title":"JIMKESMAS (Jurnal Ilmiah Mahasiswa Kesehatan Masyarakat)","title_source":"ROAD"} +{"issn":"2502-7409","issn_l":"1411-0253","title":"Jurnal ilmu informasi, perpustakaan, dan kearsipan","title_source":"ROAD"} +{"issn":"2502-7433","issn_l":"2502-7433","title":"At-Tadbir : jurnal ilmiah manajemen","title_source":"ROAD"} +{"issn":"2502-745X","issn_l":"2502-745X","title":"Jurnal Kesehatan Panrita Husada.","title_source":"ROAD"} +{"issn":"2502-7549","issn_l":"2502-7549","title":"ELang journal (An English Education journal)","title_source":"ROAD"} +{"issn":"2423-3633","issn_l":"2423-3625","title":"̒Ulūm-i darmāngāhī-i dāmpizishkī-i Īrān.","title_source":"ROAD"} +{"issn":"2423-5563","issn_l":"2423-3773","title":"Pizhūhishnāmah-i ̒ilm/sanjī.","title_source":"ROAD"} +{"issn":"1735-434X","issn_l":"1735-434X","title":"Iranian journal of animal biosystematics.","title_source":"ROAD"} +{"issn":"2423-4435","issn_l":"2008-6113","title":"Majallah-i jangal-i Īrān.","title_source":"ROAD"} +{"issn":"2423-4575","issn_l":"2423-4575","title":"Ābziyān-i zinatī.","title_source":"ROAD"} +{"issn":"2423-4974","issn_l":"2423-4974","title":"Pizhūhishnāmah-i ravābiṭ-i biyn/al- milal.","title_source":"ROAD"} +{"issn":"2380-0607","issn_l":"2380-0607","title":"AIHM journal club.","title_source":"ROAD"} +{"issn":"1085-4568","issn_l":"1085-4568","title":"Frontiers.","title_source":"ROAD"} +{"issn":"2380-8845","issn_l":"2380-8845","title":"˜The œjournal of contemporary archival studies.","title_source":"ROAD"} +{"issn":"2381-1803","issn_l":"2381-1803","title":"International journal of complementary & alternative medicine.","title_source":"ROAD"} +{"issn":"2381-2478","issn_l":"2381-2478","title":"Palapala.","title_source":"ROAD"} +{"issn":"2382-5170","issn_l":"2382-5170","title":"Asia pacific journal of environment ecology and sustainable development.","title_source":"ROAD"} +{"issn":"2382-9737","issn_l":"2382-9737","title":"Majallah-i salāmat va bihdāsht","title_source":"ROAD"} +{"issn":"2382-977X","issn_l":"2382-977X","title":"UCT journal of research in science ,engineering and technology","title_source":"ROAD"} +{"issn":"2382-9974","issn_l":"2382-9974","title":"Bih/nizhādī-i giyāhān-i zirā̒ī va bāghī.","title_source":"ROAD"} +{"issn":"2227-4782","issn_l":"2227-4782","title":"Problemi endokrinnoï patologìï.","title_source":"ROAD"} +{"issn":"2685-0079","issn_l":"2597-4971","title":"Jurnal Kebijakan Pembangunan Daerah : Jurnal Penelitian dan Pengembangan Kebijakan Pembangunan Daerah.","title_source":"ROAD"} +{"issn":"2574-0075","issn_l":"2574-0075","title":"Hypermedia magazine.","title_source":"ROAD"} +{"issn":"2574-0296","issn_l":"2574-0296","title":"˜The œmuseum review.","title_source":"ROAD"} +{"issn":"2574-0334","issn_l":"2574-0334","title":"Bioactive compounds in health and disease.","title_source":"ROAD"} +{"issn":"2574-108X","issn_l":"2574-108X","title":"Journal of computer science integration.","title_source":"ROAD"} +{"issn":"2574-254X","issn_l":"2574-254X","title":"Child and adolescent obesity.","title_source":"ROAD"} +{"issn":"2574-3325","issn_l":"2574-3325","title":"Journal of research on the college president.","title_source":"ROAD"} +{"issn":"2239-6101","issn_l":"2239-5938","title":"European journal of sustainable development.","title_source":"ROAD"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml index 95457fb70..b45fa1edb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml @@ -16,7 +16,7 @@ Doğuş Üniversitesi, Fen Edebiyat Fakültesi, Fizik Bölümü TR3959 urn:issn:1748-0221 - VOLUME=7;ISSUE=1;ISSN=1748-0221;TITLE=Journal of Instrumentation + VOLUME=7;ISSUE=1;issn=1748-0221;TITLE=Journal of Instrumentation ATLAS Collaboration Mitsou, Vasiliki Fiorini, Luca Ros Martínez, Eduardo Castillo Giménez, María Victoria Fuster Verdú, Juan A. García García, Carmen Cabrera Urbán, Susana Martí García, Salvador Salt Cairols, José Lacasta Llácer, Carlos Valls Ferrer, @@ -30,7 +30,7 @@ interactions. Journal of Instrumentation, 7(1). doi: 10.1088/1748-0221/7/01/P01013. UC Santa Cruz: Retrieved from: http://www.escholarship.org/uc/item/05j2j2br Journal of Instrumentation, 7 - VOLUME=7;ISSN=1748-0221;TITLE=Journal of Instrumentation + VOLUME=7;issn=1748-0221;TITLE=Journal of Instrumentation 1748-0221 Journal of Instrumentation 7, P01013 (2012). doi:10.1088/1748-0221/7/01/P01013 diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml index 22da07e29..06ebe97cd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml @@ -16,7 +16,7 @@
- 0006-2944 + 0006-2944 13 2 @@ -182,7 +182,7 @@
- 1090-2104 + 1090-2104 66 4 @@ -314,7 +314,7 @@
- 0006-291X + 0006-291X 66 4 @@ -460,7 +460,7 @@
- 1090-2104 + 1090-2104 66 4 @@ -644,7 +644,7 @@
- 1090-2104 + 1090-2104 66 4 @@ -774,7 +774,7 @@
- 0006-291X + 0006-291X 66 4 @@ -934,7 +934,7 @@
- 1873-2968 + 1873-2968 24 16 @@ -1614,7 +1614,7 @@
- 1873-2968 + 1873-2968 24 16 @@ -2017,7 +2017,7 @@
- 0006-2952 + 0006-2952 24 17 @@ -2185,7 +2185,7 @@
- 1873-2968 + 1873-2968 24 17 @@ -2337,7 +2337,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -2585,7 +2585,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -2838,7 +2838,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3020,7 +3020,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3204,7 +3204,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3450,7 +3450,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3683,7 +3683,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3880,7 +3880,7 @@
- 0006-2952 + 0006-2952 24 20 @@ -4069,7 +4069,7 @@
- 0006-2952 + 0006-2952 24 20 @@ -4428,7 +4428,7 @@
- 0006-2952 + 0006-2952 24 20 @@ -4590,7 +4590,7 @@
- 0004-4172 + 0004-4172 25 9 @@ -4741,7 +4741,7 @@
- 0004-4172 + 0004-4172 25 9 @@ -4999,7 +4999,7 @@
- 0004-4172 + 0004-4172 25 9 diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt index 93cc00eca..30138b5bc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN +datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID From 52e2315ba270384704f6839c5c95c7cd6b68df95 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 28 Jul 2021 10:23:00 +0200 Subject: [PATCH 005/166] removed trick for datasourcetypeui --- .../dhp/oa/dedup/GroupEntitiesSparkJob.java | 3 +- .../dhp/oa/provision/XmlConverterJob.java | 73 +-- .../oa/provision/utils/XmlRecordFactory.java | 447 ++++++++---------- .../provision/input_params_xml_converter.json | 6 - .../dhp/oa/provision/oozie_app/workflow.xml | 5 - .../provision/IndexRecordTransformerTest.java | 36 +- .../oa/provision/XmlRecordFactoryTest.java | 70 ++- pom.xml | 2 +- 8 files changed, 278 insertions(+), 364 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java index 3f27b9442..58009bfcf 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java @@ -38,8 +38,7 @@ import scala.Tuple2; /** * Groups the graph content by entity identifier to ensure ID uniqueness */ -public class -GroupEntitiesSparkJob { +public class GroupEntitiesSparkJob { private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java index b44ed7446..518f41120 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java @@ -22,7 +22,6 @@ import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Maps; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -42,61 +41,51 @@ public class XmlConverterJob { public static final String schemaLocation = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd"; - public static void main(String[] args) throws Exception { + public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( XmlConverterJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json"))); + .getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json"))); parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional + final Boolean isSparkSessionManaged = Optional .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - String inputPath = parser.get("inputPath"); + final String inputPath = parser.get("inputPath"); log.info("inputPath: {}", inputPath); - String outputPath = parser.get("outputPath"); + final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - String isLookupUrl = parser.get("isLookupUrl"); + final String isLookupUrl = parser.get("isLookupUrl"); log.info("isLookupUrl: {}", isLookupUrl); - String otherDsTypeId = parser.get("otherDsTypeId"); - log.info("otherDsTypeId: {}", otherDsTypeId); - - SparkConf conf = new SparkConf(); + final SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.registerKryoClasses(ProvisionModelSupport.getModelClasses()); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, outputPath); - convertToXml( - spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl), otherDsTypeId); - }); + runWithSparkSession(conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); + convertToXml(spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl)); + }); } private static void convertToXml( - SparkSession spark, - String inputPath, - String outputPath, - ContextMapper contextMapper, - String otherDsTypeId) { + final SparkSession spark, + final String inputPath, + final String outputPath, + final ContextMapper contextMapper) { final XmlRecordFactory recordFactory = new XmlRecordFactory( prepareAccumulators(spark.sparkContext()), contextMapper, false, - schemaLocation, - otherDsTypeId); + schemaLocation); final List paths = HdfsSupport .listFiles(inputPath, spark.sparkContext().hadoopConfiguration()); @@ -116,16 +105,15 @@ public class XmlConverterJob { .mapToPair( (PairFunction, Text, Text>) t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))) - .saveAsHadoopFile( - outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class); + .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class); } - private static void removeOutputDir(SparkSession spark, String path) { + private static void removeOutputDir(final SparkSession spark, final String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } - private static Map prepareAccumulators(SparkContext sc) { - Map accumulators = Maps.newHashMap(); + private static Map prepareAccumulators(final SparkContext sc) { + final Map accumulators = Maps.newHashMap(); accumulators .put( "resultResult_similarity_isAmongTopNSimilarDocuments", @@ -136,15 +124,13 @@ public class XmlConverterJob { sc.longAccumulator("resultResult_similarity_hasAmongTopNSimilarDocuments")); accumulators .put( - "resultResult_supplement_isSupplementTo", - sc.longAccumulator("resultResult_supplement_isSupplementTo")); + "resultResult_supplement_isSupplementTo", sc.longAccumulator("resultResult_supplement_isSupplementTo")); accumulators .put( "resultResult_supplement_isSupplementedBy", sc.longAccumulator("resultResult_supplement_isSupplementedBy")); accumulators - .put( - "resultResult_dedup_isMergedIn", sc.longAccumulator("resultResult_dedup_isMergedIn")); + .put("resultResult_dedup_isMergedIn", sc.longAccumulator("resultResult_dedup_isMergedIn")); accumulators.put("resultResult_dedup_merges", sc.longAccumulator("resultResult_dedup_merges")); accumulators @@ -152,16 +138,11 @@ public class XmlConverterJob { "resultResult_publicationDataset_isRelatedTo", sc.longAccumulator("resultResult_publicationDataset_isRelatedTo")); accumulators - .put( - "resultResult_relationship_isRelatedTo", - sc.longAccumulator("resultResult_relationship_isRelatedTo")); + .put("resultResult_relationship_isRelatedTo", sc.longAccumulator("resultResult_relationship_isRelatedTo")); accumulators - .put( - "resultProject_outcome_isProducedBy", - sc.longAccumulator("resultProject_outcome_isProducedBy")); + .put("resultProject_outcome_isProducedBy", sc.longAccumulator("resultProject_outcome_isProducedBy")); accumulators - .put( - "resultProject_outcome_produces", sc.longAccumulator("resultProject_outcome_produces")); + .put("resultProject_outcome_produces", sc.longAccumulator("resultProject_outcome_produces")); accumulators .put( "resultOrganization_affiliation_isAuthorInstitutionOf", @@ -184,9 +165,7 @@ public class XmlConverterJob { "organizationOrganization_dedup_isMergedIn", sc.longAccumulator("organizationOrganization_dedup_isMergedIn")); accumulators - .put( - "organizationOrganization_dedup_merges", - sc.longAccumulator("resultProject_outcome_produces")); + .put("organizationOrganization_dedup_merges", sc.longAccumulator("resultProject_outcome_produces")); accumulators .put( "datasourceOrganization_provision_isProvidedBy", diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index a985d2371..392d3cde6 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -1,7 +1,8 @@ package eu.dnetlib.dhp.oa.provision.utils; -import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.*; +import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.authorPidTypes; +import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.getRelDescriptor; import static org.apache.commons.lang3.StringUtils.isNotBlank; import static org.apache.commons.lang3.StringUtils.substringBefore; @@ -9,14 +10,23 @@ import java.io.IOException; import java.io.Serializable; import java.io.StringReader; import java.io.StringWriter; -import java.util.*; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; -import javax.xml.transform.*; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; -import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import org.apache.commons.lang3.StringUtils; import org.apache.spark.util.LongAccumulator; import org.dom4j.Document; @@ -36,19 +46,38 @@ import com.google.common.collect.Sets; import com.mycila.xmltool.XMLDoc; import com.mycila.xmltool.XMLTag; -import eu.dnetlib.dhp.oa.provision.model.*; +import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; +import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; +import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.MainEntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.ExternalReference; +import eu.dnetlib.dhp.schema.oaf.ExtraInfo; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.Journal; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.Software; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { - private final Map accumulators; + /** + * + */ + private static final long serialVersionUID = 2912912999272373172L; - private final Set specialDatasourceTypes; + private final Map accumulators; private final ContextMapper contextMapper; @@ -61,23 +90,20 @@ public class XmlRecordFactory implements Serializable { public XmlRecordFactory( final ContextMapper contextMapper, final boolean indent, - final String schemaLocation, - final String otherDatasourceTypesUForUI) { + final String schemaLocation) { - this(Maps.newHashMap(), contextMapper, indent, schemaLocation, otherDatasourceTypesUForUI); + this(Maps.newHashMap(), contextMapper, indent, schemaLocation); } public XmlRecordFactory( final Map accumulators, final ContextMapper contextMapper, final boolean indent, - final String schemaLocation, - final String otherDatasourceTypesUForUI) { + final String schemaLocation) { this.accumulators = accumulators; this.contextMapper = contextMapper; this.schemaLocation = schemaLocation; - this.specialDatasourceTypes = Sets.newHashSet(Splitter.on(",").trimResults().split(otherDatasourceTypesUForUI)); this.indent = indent; } @@ -87,8 +113,8 @@ public class XmlRecordFactory implements Serializable { final Set contexts = Sets.newHashSet(); // final OafEntity entity = toOafEntity(je.getEntity()); - OafEntity entity = je.getEntity(); - TemplateFactory templateFactory = new TemplateFactory(); + final OafEntity entity = je.getEntity(); + final TemplateFactory templateFactory = new TemplateFactory(); try { final EntityType type = EntityType.fromClass(entity.getClass()); @@ -110,11 +136,7 @@ public class XmlRecordFactory implements Serializable { final String body = templateFactory .buildBody( - mainType, - metadata, - relations, - listChildren(entity, je, templateFactory), - listExtraInfo(entity)); + mainType, metadata, relations, listChildren(entity, je, templateFactory), listExtraInfo(entity)); return printXML(templateFactory.buildRecord(entity, schemaLocation, body), indent); } catch (final Throwable e) { @@ -142,19 +164,19 @@ public class XmlRecordFactory implements Serializable { default: throw new IllegalArgumentException("invalid type: " + type); } - } catch (IOException e) { + } catch (final IOException e) { throw new IllegalArgumentException(e); } } - private String printXML(String xml, boolean indent) { + private String printXML(final String xml, final boolean indent) { try { final Document doc = new SAXReader().read(new StringReader(xml)); - OutputFormat format = indent ? OutputFormat.createPrettyPrint() : OutputFormat.createCompactFormat(); + final OutputFormat format = indent ? OutputFormat.createPrettyPrint() : OutputFormat.createCompactFormat(); format.setExpandEmptyElements(false); format.setSuppressDeclaration(true); - StringWriter sw = new StringWriter(); - XMLWriter writer = new XMLWriter(sw, format); + final StringWriter sw = new StringWriter(); + final XMLWriter writer = new XMLWriter(sw, format); writer.write(doc); return sw.toString(); } catch (IOException | DocumentException e) { @@ -163,7 +185,9 @@ public class XmlRecordFactory implements Serializable { } private List metadata( - final EntityType type, final OafEntity entity, final Set contexts) { + final EntityType type, + final OafEntity entity, + final Set contexts) { final List metadata = Lists.newArrayList(); @@ -230,72 +254,63 @@ public class XmlRecordFactory implements Serializable { .getAuthor() .stream() .filter(Objects::nonNull) - .map( - a -> { - final StringBuilder sb = new StringBuilder(" isNotBlank(sp.getQualifier().getClassid()) - && isNotBlank(sp.getValue())) - .collect( - Collectors - .toMap( - p -> getAuthorPidType(p.getQualifier().getClassid()), - p -> p, - (p1, p2) -> p1)) - .values() - .stream() - .collect( - Collectors - .groupingBy( - p -> p.getValue(), - Collectors - .mapping( - p -> p, - Collectors.minBy(new AuthorPidTypeComparator())))) - .values() - .stream() - .map(op -> op.get()) - .forEach( - sp -> { - String pidType = getAuthorPidType(sp.getQualifier().getClassid()); - String pidValue = XmlSerializationUtils.escapeXml(sp.getValue()); - - // ugly hack: some records provide swapped pidtype and pidvalue - if (authorPidTypes.contains(pidValue.toLowerCase().trim())) { - sb.append(String.format(" %s=\"%s\"", pidValue, pidType)); - } else { - if (isNotBlank(pidType)) { - sb - .append( - String - .format( - " %s=\"%s\"", - pidType, - pidValue - .toLowerCase() - .replaceAll("^.*orcid\\.org\\/", ""))); - } - } - }); - } + .map(a -> { + final StringBuilder sb = new StringBuilder("" + XmlSerializationUtils.escapeXml(a.getFullname()) + ""); - return sb.toString(); - }) + .append(" surname=\"" + XmlSerializationUtils.escapeXml(a.getSurname()) + "\""); + } + if (a.getPid() != null) { + a + .getPid() + .stream() + .filter(Objects::nonNull) + .filter( + sp -> isNotBlank(sp.getQualifier().getClassid()) + && isNotBlank(sp.getValue())) + .collect( + Collectors + .toMap( + p -> getAuthorPidType(p.getQualifier().getClassid()), p -> p, + (p1, p2) -> p1)) + .values() + .stream() + .collect( + Collectors + .groupingBy( + p -> p.getValue(), Collectors + .mapping( + p -> p, Collectors.minBy(new AuthorPidTypeComparator())))) + .values() + .stream() + .map(op -> op.get()) + .forEach(sp -> { + final String pidType = getAuthorPidType(sp.getQualifier().getClassid()); + final String pidValue = XmlSerializationUtils.escapeXml(sp.getValue()); + + // ugly hack: some records provide swapped pidtype and pidvalue + if (authorPidTypes.contains(pidValue.toLowerCase().trim())) { + sb.append(String.format(" %s=\"%s\"", pidValue, pidType)); + } else { + if (isNotBlank(pidType)) { + sb + .append( + String + .format( + " %s=\"%s\"", pidType, pidValue + .toLowerCase() + .replaceAll("^.*orcid\\.org\\/", ""))); + } + } + }); + } + sb + .append(">" + XmlSerializationUtils.escapeXml(a.getFullname()) + ""); + return sb.toString(); + }) .collect(Collectors.toList())); } if (r.getContributor() != null) { @@ -332,8 +347,7 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "dateofacceptance", r.getDateofacceptance().getValue())); + .asXmlElement("dateofacceptance", r.getDateofacceptance().getValue())); } if (r.getDescription() != null) { metadata @@ -347,8 +361,7 @@ public class XmlRecordFactory implements Serializable { } if (r.getEmbargoenddate() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate().getValue())); + .add(XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate().getValue())); } if (r.getSubject() != null) { metadata @@ -423,23 +436,20 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "lastmetadataupdate", d.getLastmetadataupdate().getValue())); + .asXmlElement("lastmetadataupdate", d.getLastmetadataupdate().getValue())); } if (d.getMetadataversionnumber() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "metadataversionnumber", d.getMetadataversionnumber().getValue())); + .asXmlElement("metadataversionnumber", d.getMetadataversionnumber().getValue())); } if (d.getSize() != null) { metadata.add(XmlSerializationUtils.asXmlElement("size", d.getSize().getValue())); } if (d.getStoragedate() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate().getValue())); + .add(XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate().getValue())); } if (d.getVersion() != null) { metadata.add(XmlSerializationUtils.asXmlElement("version", d.getVersion().getValue())); @@ -509,98 +519,87 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "codeRepositoryUrl", s.getCodeRepositoryUrl().getValue())); + .asXmlElement("codeRepositoryUrl", s.getCodeRepositoryUrl().getValue())); } if (s.getProgrammingLanguage() != null) { metadata .add( XmlSerializationUtils - .mapQualifier( - "programmingLanguage", s.getProgrammingLanguage())); + .mapQualifier("programmingLanguage", s.getProgrammingLanguage())); } break; case datasource: final Datasource ds = (Datasource) entity; if (ds.getDatasourcetype() != null) { - mapDatasourceType(metadata, ds.getDatasourcetype()); + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", ds.getDatasourcetype())); + } + if (ds.getDatasourcetypeui() != null) { + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", ds.getDatasourcetypeui())); } if (ds.getOpenairecompatibility() != null) { metadata .add( XmlSerializationUtils - .mapQualifier( - "openairecompatibility", ds.getOpenairecompatibility())); + .mapQualifier("openairecompatibility", ds.getOpenairecompatibility())); } if (ds.getOfficialname() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname().getValue())); + .add(XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname().getValue())); } if (ds.getEnglishname() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname().getValue())); + .add(XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname().getValue())); } if (ds.getWebsiteurl() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl().getValue())); + .add(XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl().getValue())); } if (ds.getLogourl() != null) { metadata.add(XmlSerializationUtils.asXmlElement("logourl", ds.getLogourl().getValue())); } if (ds.getContactemail() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail().getValue())); + .add(XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail().getValue())); } if (ds.getNamespaceprefix() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "namespaceprefix", ds.getNamespaceprefix().getValue())); + .asXmlElement("namespaceprefix", ds.getNamespaceprefix().getValue())); } if (ds.getLatitude() != null) { metadata.add(XmlSerializationUtils.asXmlElement("latitude", ds.getLatitude().getValue())); } if (ds.getLongitude() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude().getValue())); + .add(XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude().getValue())); } if (ds.getDateofvalidation() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "dateofvalidation", ds.getDateofvalidation().getValue())); + .asXmlElement("dateofvalidation", ds.getDateofvalidation().getValue())); } if (ds.getDescription() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue())); + .add(XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue())); } if (ds.getOdnumberofitems() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "odnumberofitems", ds.getOdnumberofitems().getValue())); + .asXmlElement("odnumberofitems", ds.getOdnumberofitems().getValue())); } if (ds.getOdnumberofitemsdate() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "odnumberofitemsdate", ds.getOdnumberofitemsdate().getValue())); + .asXmlElement("odnumberofitemsdate", ds.getOdnumberofitemsdate().getValue())); } if (ds.getOdpolicies() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies().getValue())); + .add(XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies().getValue())); } if (ds.getOdlanguages() != null) { metadata @@ -635,50 +634,43 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "releasestartdate", ds.getReleaseenddate().getValue())); + .asXmlElement("releasestartdate", ds.getReleaseenddate().getValue())); } if (ds.getReleaseenddate() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "releaseenddate", ds.getReleaseenddate().getValue())); + .asXmlElement("releaseenddate", ds.getReleaseenddate().getValue())); } if (ds.getMissionstatementurl() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "missionstatementurl", ds.getMissionstatementurl().getValue())); + .asXmlElement("missionstatementurl", ds.getMissionstatementurl().getValue())); } if (ds.getDataprovider() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "dataprovider", ds.getDataprovider().getValue().toString())); + .asXmlElement("dataprovider", ds.getDataprovider().getValue().toString())); } if (ds.getServiceprovider() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "serviceprovider", ds.getServiceprovider().getValue().toString())); + .asXmlElement("serviceprovider", ds.getServiceprovider().getValue().toString())); } if (ds.getDatabaseaccesstype() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "databaseaccesstype", ds.getDatabaseaccesstype().getValue())); + .asXmlElement("databaseaccesstype", ds.getDatabaseaccesstype().getValue())); } if (ds.getDatauploadtype() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "datauploadtype", ds.getDatauploadtype().getValue())); + .asXmlElement("datauploadtype", ds.getDatauploadtype().getValue())); } if (ds.getDatabaseaccessrestriction() != null) { metadata @@ -691,39 +683,33 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "datauploadrestriction", ds.getDatauploadrestriction().getValue())); + .asXmlElement("datauploadrestriction", ds.getDatauploadrestriction().getValue())); } if (ds.getVersioning() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "versioning", ds.getVersioning().getValue().toString())); + .asXmlElement("versioning", ds.getVersioning().getValue().toString())); } if (ds.getCitationguidelineurl() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "citationguidelineurl", ds.getCitationguidelineurl().getValue())); + .asXmlElement("citationguidelineurl", ds.getCitationguidelineurl().getValue())); } if (ds.getQualitymanagementkind() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "qualitymanagementkind", ds.getQualitymanagementkind().getValue())); + .asXmlElement("qualitymanagementkind", ds.getQualitymanagementkind().getValue())); } if (ds.getPidsystems() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue())); + .add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue())); } if (ds.getCertificates() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates().getValue())); + .add(XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates().getValue())); } if (ds.getPolicies() != null) { metadata @@ -757,13 +743,11 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "legalshortname", o.getLegalshortname().getValue())); + .asXmlElement("legalshortname", o.getLegalshortname().getValue())); } if (o.getLegalname() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("legalname", o.getLegalname().getValue())); + .add(XmlSerializationUtils.asXmlElement("legalname", o.getLegalname().getValue())); } if (o.getAlternativeNames() != null) { metadata @@ -777,8 +761,7 @@ public class XmlRecordFactory implements Serializable { } if (o.getWebsiteurl() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue())); + .add(XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue())); } if (o.getLogourl() != null) { metadata.add(XmlSerializationUtils.asXmlElement("logourl", o.getLogourl().getValue())); @@ -786,32 +769,27 @@ public class XmlRecordFactory implements Serializable { if (o.getEclegalbody() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody().getValue())); + .add(XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody().getValue())); } if (o.getEclegalperson() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson().getValue())); + .add(XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson().getValue())); } if (o.getEcnonprofit() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit().getValue())); } if (o.getEcresearchorganization() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "ecresearchorganization", o.getEcresearchorganization().getValue())); + .asXmlElement("ecresearchorganization", o.getEcresearchorganization().getValue())); } if (o.getEchighereducation() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "echighereducation", o.getEchighereducation().getValue())); + .asXmlElement("echighereducation", o.getEchighereducation().getValue())); } if (o.getEcinternationalorganizationeurinterests() != null) { metadata @@ -830,20 +808,17 @@ public class XmlRecordFactory implements Serializable { } if (o.getEcenterprise() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise().getValue())); } if (o.getEcsmevalidated() != null) { metadata .add( XmlSerializationUtils - .asXmlElement( - "ecsmevalidated", o.getEcsmevalidated().getValue())); + .asXmlElement("ecsmevalidated", o.getEcsmevalidated().getValue())); } if (o.getEcnutscode() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode().getValue())); } if (o.getCountry() != null) { metadata.add(XmlSerializationUtils.mapQualifier("country", o.getCountry())); @@ -855,8 +830,7 @@ public class XmlRecordFactory implements Serializable { if (p.getWebsiteurl() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl().getValue())); + .add(XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl().getValue())); } if (p.getCode() != null) { metadata.add(XmlSerializationUtils.asXmlElement("code", p.getCode().getValue())); @@ -869,8 +843,7 @@ public class XmlRecordFactory implements Serializable { } if (p.getStartdate() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("startdate", p.getStartdate().getValue())); + .add(XmlSerializationUtils.asXmlElement("startdate", p.getStartdate().getValue())); } if (p.getEnddate() != null) { metadata.add(XmlSerializationUtils.asXmlElement("enddate", p.getEnddate().getValue())); @@ -879,8 +852,7 @@ public class XmlRecordFactory implements Serializable { metadata .add( XmlSerializationUtils - .asXmlElement( - "callidentifier", p.getCallidentifier().getValue())); + .asXmlElement("callidentifier", p.getCallidentifier().getValue())); } if (p.getKeywords() != null) { metadata.add(XmlSerializationUtils.asXmlElement("keywords", p.getKeywords().getValue())); @@ -890,8 +862,7 @@ public class XmlRecordFactory implements Serializable { } if (p.getEcarticle29_3() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3().getValue())); + .add(XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3().getValue())); } if (p.getSubjects() != null) { metadata @@ -923,13 +894,11 @@ public class XmlRecordFactory implements Serializable { } if (p.getTotalcost() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("totalcost", p.getTotalcost().toString())); + .add(XmlSerializationUtils.asXmlElement("totalcost", p.getTotalcost().toString())); } if (p.getFundedamount() != null) { metadata - .add( - XmlSerializationUtils.asXmlElement("fundedamount", p.getFundedamount().toString())); + .add(XmlSerializationUtils.asXmlElement("fundedamount", p.getFundedamount().toString())); } if (p.getFundingtree() != null) { metadata @@ -950,28 +919,18 @@ public class XmlRecordFactory implements Serializable { return metadata; } - private String getAuthorPidType(String s) { + private String getAuthorPidType(final String s) { return XmlSerializationUtils .escapeXml(s) .replaceAll("\\W", "") .replaceAll("\\d", ""); } - private static boolean kvNotBlank(KeyValue kv) { + private static boolean kvNotBlank(final KeyValue kv) { return kv != null && StringUtils.isNotBlank(kv.getKey()) && StringUtils.isNotBlank(kv.getValue()); } - private void mapDatasourceType(List metadata, final Qualifier dsType) { - metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", dsType)); - - if (specialDatasourceTypes.contains(dsType.getClassid())) { - dsType.setClassid("other"); - dsType.setClassname("other"); - } - metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", dsType)); - } - - private List mapFields(RelatedEntityWrapper link, Set contexts) { + private List mapFields(final RelatedEntityWrapper link, final Set contexts) { final Relation rel = link.getRelation(); final RelatedEntity re = link.getTarget(); final String targetType = link.getTarget().getType(); @@ -987,16 +946,14 @@ public class XmlRecordFactory implements Serializable { } if (isNotBlank(re.getDateofacceptance())) { metadata - .add( - XmlSerializationUtils.asXmlElement("dateofacceptance", re.getDateofacceptance())); + .add(XmlSerializationUtils.asXmlElement("dateofacceptance", re.getDateofacceptance())); } if (isNotBlank(re.getPublisher())) { metadata.add(XmlSerializationUtils.asXmlElement("publisher", re.getPublisher())); } if (isNotBlank(re.getCodeRepositoryUrl())) { metadata - .add( - XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl())); + .add(XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl())); } if (re.getResulttype() != null && re.getResulttype().isBlank()) { metadata.add(XmlSerializationUtils.mapQualifier("resulttype", re.getResulttype())); @@ -1026,14 +983,16 @@ public class XmlRecordFactory implements Serializable { metadata.add(XmlSerializationUtils.asXmlElement("officialname", re.getOfficialname())); } if (re.getDatasourcetype() != null && !re.getDatasourcetype().isBlank()) { - mapDatasourceType(metadata, re.getDatasourcetype()); + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", re.getDatasourcetype())); + } + if (re.getDatasourcetypeui() != null && !re.getDatasourcetypeui().isBlank()) { + metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", re.getDatasourcetypeui())); } if (re.getOpenairecompatibility() != null && !re.getOpenairecompatibility().isBlank()) { metadata .add( XmlSerializationUtils - .mapQualifier( - "openairecompatibility", re.getOpenairecompatibility())); + .mapQualifier("openairecompatibility", re.getOpenairecompatibility())); } break; case organization: @@ -1042,8 +1001,7 @@ public class XmlRecordFactory implements Serializable { } if (isNotBlank(re.getLegalshortname())) { metadata - .add( - XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname())); + .add(XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname())); } if (re.getCountry() != null && !re.getCountry().isBlank()) { metadata.add(XmlSerializationUtils.mapQualifier("country", re.getCountry())); @@ -1085,8 +1043,10 @@ public class XmlRecordFactory implements Serializable { return metadata; } - private String mapRelation(Set contexts, TemplateFactory templateFactory, EntityType type, - RelatedEntityWrapper link) { + private String mapRelation(final Set contexts, + final TemplateFactory templateFactory, + final EntityType type, + final RelatedEntityWrapper link) { final Relation rel = link.getRelation(); final String targetType = link.getTarget().getType(); final String scheme = ModelSupport.getScheme(type.toString(), targetType); @@ -1096,8 +1056,9 @@ public class XmlRecordFactory implements Serializable { String.format("missing scheme for: <%s - %s>", type, targetType)); } final HashSet fields = Sets.newHashSet(mapFields(link, contexts)); - if (rel.getValidated() == null) + if (rel.getValidated() == null) { rel.setValidated(false); + } return templateFactory .getRel( targetType, rel.getTarget(), fields, rel.getRelClass(), scheme, rel.getDataInfo(), rel.getValidated(), @@ -1105,12 +1066,14 @@ public class XmlRecordFactory implements Serializable { } private List listChildren( - final OafEntity entity, JoinedEntity je, TemplateFactory templateFactory) { + final OafEntity entity, + final JoinedEntity je, + final TemplateFactory templateFactory) { final EntityType entityType = EntityType.fromClass(je.getEntity().getClass()); final List links = je.getLinks(); - List children = links + final List children = links .stream() .filter(link -> isDuplicate(link)) .map(link -> { @@ -1131,13 +1094,11 @@ public class XmlRecordFactory implements Serializable { if (instance.getAccessright() != null && !instance.getAccessright().isBlank()) { fields - .add( - XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright())); + .add(XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright())); } if (instance.getCollectedfrom() != null && kvNotBlank(instance.getCollectedfrom())) { fields - .add( - XmlSerializationUtils.mapKeyValue("collectedfrom", instance.getCollectedfrom())); + .add(XmlSerializationUtils.mapKeyValue("collectedfrom", instance.getCollectedfrom())); } if (instance.getHostedby() != null && kvNotBlank(instance.getHostedby())) { fields.add(XmlSerializationUtils.mapKeyValue("hostedby", instance.getHostedby())); @@ -1147,20 +1108,17 @@ public class XmlRecordFactory implements Serializable { fields .add( XmlSerializationUtils - .asXmlElement( - "dateofacceptance", instance.getDateofacceptance().getValue())); + .asXmlElement("dateofacceptance", instance.getDateofacceptance().getValue())); } if (instance.getInstancetype() != null && !instance.getInstancetype().isBlank()) { fields - .add( - XmlSerializationUtils.mapQualifier("instancetype", instance.getInstancetype())); + .add(XmlSerializationUtils.mapQualifier("instancetype", instance.getInstancetype())); } if (isNotBlank(instance.getDistributionlocation())) { fields .add( XmlSerializationUtils - .asXmlElement( - "distributionlocation", instance.getDistributionlocation())); + .asXmlElement("distributionlocation", instance.getDistributionlocation())); } if (instance.getPid() != null) { fields @@ -1185,8 +1143,7 @@ public class XmlRecordFactory implements Serializable { if (instance.getRefereed() != null && !instance.getRefereed().isBlank()) { fields - .add( - XmlSerializationUtils.mapQualifier("refereed", instance.getRefereed())); + .add(XmlSerializationUtils.mapQualifier("refereed", instance.getRefereed())); } if (instance.getProcessingchargeamount() != null && isNotBlank(instance.getProcessingchargeamount().getValue())) { @@ -1208,8 +1165,7 @@ public class XmlRecordFactory implements Serializable { children .add( templateFactory - .getInstance( - instance.getHostedby().getKey(), fields, instance.getUrl())); + .getInstance(instance.getHostedby().getKey(), fields, instance.getUrl())); } } final List ext = ((Result) entity).getExternalReference(); @@ -1254,11 +1210,11 @@ public class XmlRecordFactory implements Serializable { return children; } - private boolean isDuplicate(RelatedEntityWrapper link) { + private boolean isDuplicate(final RelatedEntityWrapper link) { return ModelConstants.DEDUP.equalsIgnoreCase(link.getRelation().getSubRelType()); } - private List listExtraInfo(OafEntity entity) { + private List listExtraInfo(final OafEntity entity) { final List extraInfo = entity.getExtraInfo(); return extraInfo != null ? extraInfo @@ -1271,7 +1227,7 @@ public class XmlRecordFactory implements Serializable { private List buildContexts(final String type, final Set contexts) { final List res = Lists.newArrayList(); - if ((contextMapper != null) + if (contextMapper != null && !contextMapper.isEmpty() && MainEntityType.result.toString().equals(type)) { @@ -1302,8 +1258,7 @@ public class XmlRecordFactory implements Serializable { if (def.getName().equals("category")) { final String rootId = substringBefore(def.getId(), "::"); document = addContextDef( - document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), - def); + document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def); } if (def.getName().equals("concept")) { @@ -1327,17 +1282,17 @@ public class XmlRecordFactory implements Serializable { private Transformer getTransformer() { try { - Transformer transformer = TransformerFactory.newInstance().newTransformer(); + final Transformer transformer = TransformerFactory.newInstance().newTransformer(); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); return transformer; - } catch (TransformerConfigurationException e) { + } catch (final TransformerConfigurationException e) { throw new IllegalStateException("unable to create javax.xml.transform.Transformer", e); } } private XMLTag addContextDef(final XMLTag tag, final ContextDef def) { tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel()); - if ((def.getType() != null) && !def.getType().isEmpty()) { + if (def.getType() != null && !def.getType().isEmpty()) { tag.addAttribute("type", def.getType()); } return tag; @@ -1374,16 +1329,14 @@ public class XmlRecordFactory implements Serializable { if (level0 != null) { final String level0Id = Joiner.on("::").join(funderShortName, level0.valueOf("./name")); contextMapper - .put( - level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", "")); + .put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", "")); final Node level1 = fundingPath.selectSingleNode("//funding_level_1"); if (level1 == null) { contexts.add(level0Id); } else { final String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name")); contextMapper - .put( - level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", "")); + .put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", "")); final Node level2 = fundingPath.selectSingleNode("//funding_level_2"); if (level2 == null) { contexts.add(level1Id); @@ -1391,8 +1344,7 @@ public class XmlRecordFactory implements Serializable { final String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name")); contextMapper .put( - level2Id, - new ContextDef(level2Id, level2.valueOf("./description"), "concept", "")); + level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", "")); contexts.add(level2Id); } } @@ -1413,8 +1365,7 @@ public class XmlRecordFactory implements Serializable { funding += getFunderElement(ftree); for (final Object o : Lists - .reverse( - ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) { + .reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) { final Element e = (Element) o; final String _id = e.valueOf("./id"); funding += "<" diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json index 32720514e..eda6154d7 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json @@ -16,11 +16,5 @@ "paramLongName": "isLookupUrl", "paramDescription": "URL of the isLookUp Service", "paramRequired": true - }, - { - "paramName": "odt", - "paramLongName": "otherDsTypeId", - "paramDescription": "list of datasource types to populate field datasourcetypeui", - "paramRequired": true } ] diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index 9280678c1..9f41805e2 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -25,10 +25,6 @@ targetMaxRelations maximum number of relations allowed for a each entity grouping by target - - otherDsTypeId - mapping used to populate datasourceTypeUi field - format metadata format name (DMF|TMF) @@ -582,7 +578,6 @@ --inputPath${workingDir}/join_entities --outputPath${workingDir}/xml --isLookupUrl${isLookupUrl} - --otherDsTypeId${otherDsTypeId} diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index e1ed4ffe4..cd07cfcb1 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.provision; import static org.junit.jupiter.api.Assertions.assertNotNull; import java.io.IOException; -import java.util.List; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; @@ -16,11 +15,9 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; -import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory; @@ -49,7 +46,7 @@ public class IndexRecordTransformerTest { @Test public void testPreBuiltRecordTransformation() throws IOException, TransformerException { - String record = IOUtils.toString(getClass().getResourceAsStream("record.xml")); + final String record = IOUtils.toString(getClass().getResourceAsStream("record.xml")); testRecordTransformation(record); } @@ -57,14 +54,14 @@ public class IndexRecordTransformerTest { @Test public void testPublicationRecordTransformation() throws IOException, TransformerException { - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - XmlRecordFactoryTest.otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = load("publication.json", Publication.class); - Project pj = load("project.json", Project.class); - Relation rel = load("relToValidatedProject.json", Relation.class); + final Publication p = load("publication.json", Publication.class); + final Project pj = load("project.json", Project.class); + final Relation rel = load("relToValidatedProject.json", Relation.class); - JoinedEntity je = new JoinedEntity<>(p); + final JoinedEntity je = new JoinedEntity<>(p); je .setLinks( Lists @@ -72,24 +69,25 @@ public class IndexRecordTransformerTest { new RelatedEntityWrapper(rel, CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class)))); - String record = xmlRecordFactory.build(je); + final String record = xmlRecordFactory.build(je); assertNotNull(record); testRecordTransformation(record); } - private void testRecordTransformation(String record) throws IOException, TransformerException { - String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); - String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); + private void testRecordTransformation(final String record) throws IOException, TransformerException { + final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); + final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); - String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt); + final String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt); - Transformer tr = SaxonTransformerFactory.newInstance(transformer); + final Transformer tr = SaxonTransformerFactory.newInstance(transformer); - String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record); + final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record); - SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID).parseDocument(indexRecordXML); + final SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID) + .parseDocument(indexRecordXML); final String xmlDoc = ClientUtils.toXML(solrDoc); @@ -97,7 +95,7 @@ public class IndexRecordTransformerTest { System.out.println(xmlDoc); } - private T load(String fileName, Class clazz) throws IOException { + private T load(final String fileName, final Class clazz) throws IOException { return XmlRecordFactoryTest.OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream(fileName)), clazz); } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 6631cb4da..2b5e08e92 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -1,7 +1,8 @@ package eu.dnetlib.dhp.oa.provision; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import java.io.IOException; import java.io.StringReader; @@ -29,27 +30,25 @@ import eu.dnetlib.dhp.schema.oaf.Relation; public class XmlRecordFactoryTest { - public static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource"; - public static ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @Test public void testXMLRecordFactory() throws IOException, DocumentException { - ContextMapper contextMapper = new ContextMapper(); + final ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = OBJECT_MAPPER + final Publication p = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); - String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); + final String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); assertNotNull(xml); - Document doc = new SAXReader().read(new StringReader(xml)); + final Document doc = new SAXReader().read(new StringReader(xml)); assertNotNull(doc); @@ -72,30 +71,29 @@ public class XmlRecordFactoryTest { @Test public void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException { - ContextMapper contextMapper = new ContextMapper(); + final ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = OBJECT_MAPPER + final Publication p = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); - Project pj = OBJECT_MAPPER + final Project pj = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class); - Relation rel = OBJECT_MAPPER - .readValue( - (IOUtils.toString(getClass().getResourceAsStream("relToValidatedProject.json"))), Relation.class); - RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); - List links = Lists.newArrayList(); - RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); + final Relation rel = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("relToValidatedProject.json")), Relation.class); + final RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); + final List links = Lists.newArrayList(); + final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); links.add(rew); - JoinedEntity je = new JoinedEntity<>(p); + final JoinedEntity je = new JoinedEntity<>(p); je.setLinks(links); - String xml = xmlRecordFactory.build(je); + final String xml = xmlRecordFactory.build(je); assertNotNull(xml); - Document doc = new SAXReader().read(new StringReader(xml)); + final Document doc = new SAXReader().read(new StringReader(xml)); assertNotNull(doc); System.out.println(doc.asXML()); Assertions.assertEquals("2021-01-01", doc.valueOf("//validated/@date")); @@ -104,29 +102,29 @@ public class XmlRecordFactoryTest { @Test public void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException { - ContextMapper contextMapper = new ContextMapper(); + final ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, - otherDsTypeId); + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); - Publication p = OBJECT_MAPPER + final Publication p = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class); - Project pj = OBJECT_MAPPER + final Project pj = OBJECT_MAPPER .readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class); - Relation rel = OBJECT_MAPPER - .readValue((IOUtils.toString(getClass().getResourceAsStream("relToProject.json"))), Relation.class); - RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); - List links = Lists.newArrayList(); - RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); + final Relation rel = OBJECT_MAPPER + .readValue(IOUtils.toString(getClass().getResourceAsStream("relToProject.json")), Relation.class); + final RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class); + final List links = Lists.newArrayList(); + final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject); links.add(rew); - JoinedEntity je = new JoinedEntity<>(p); + final JoinedEntity je = new JoinedEntity<>(p); je.setLinks(links); - String xml = xmlRecordFactory.build(je); + final String xml = xmlRecordFactory.build(je); assertNotNull(xml); - Document doc = new SAXReader().read(new StringReader(xml)); + final Document doc = new SAXReader().read(new StringReader(xml)); assertNotNull(doc); System.out.println(doc.asXML()); assertEquals("", doc.valueOf("//rel/validated")); diff --git a/pom.xml b/pom.xml index 6e4526e41..1a3523f2b 100644 --- a/pom.xml +++ b/pom.xml @@ -736,7 +736,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.7.14] + [2.7.15-SNAPSHOT] [4.0.3] [6.0.5] [3.1.6] From 0424f47494ffcc792fc4d9c7dc4e79407dc105c5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 28 Jul 2021 10:24:13 +0200 Subject: [PATCH 006/166] HostedByMap fixing issues --- .../dhp/oa/graph/hostebymap/Aggregators.scala | 2 +- .../SparkPrepareHostedByMapData.scala | 240 +++++++++++------- .../oa/graph/hostedbymap/TestPreprocess.scala | 85 ++++++- .../dhp/oa/graph/hostedbymap/datasource.json | 20 +- 4 files changed, 234 insertions(+), 113 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala index d98418339..561d2bbf4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala @@ -5,7 +5,7 @@ import org.apache.spark.sql.expressions.Aggregator case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} - +case class HostedByInfo(id: String, officialname: String, journal_id: String, provenance : String, id_type: String) {} object Aggregators { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala index 55b6e36cd..b66e97281 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala @@ -2,27 +2,23 @@ package eu.dnetlib.dhp.oa.graph.hostebymap import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostebymap.model.{DOAJModel, UnibiGoldModel} -import eu.dnetlib.dhp.oa.merge.AuthorMerger -import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.{Datasource, Organization, Publication, Relation} +import eu.dnetlib.dhp.schema.oaf.{Datasource} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} -import org.json4s.jackson.Serialization.write -import scala.collection.mutable.ListBuffer +import com.fasterxml.jackson.databind.ObjectMapper object SparkPrepareHostedByMapData { - case class HostedByInfo(id: Option[String], officialname: String, journal_id: String, provenance : String, id_type: String) {} implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) - implicit val mapEncoderDats: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) - implicit val mapEncoderDOAJ: Encoder[DOAJModel] = Encoders.kryo[DOAJModel] - implicit val mapEncoderUnibi: Encoder[UnibiGoldModel] = Encoders.kryo[UnibiGoldModel] - implicit val mapEncoderHBI: Encoder[HostedByInfo] = Encoders.product[HostedByInfo] + + + + def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)) : HostedByItemType = { @@ -32,59 +28,144 @@ object SparkPrepareHostedByMapData { val isOpenAccess: Boolean = doaj == null && gold == null openaire.journal_id match { - case Constants.ISSN => return HostedByItemType(openaire.id.get, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) - case Constants.EISSN => return HostedByItemType(openaire.id.get, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) - case Constants.ISSNL => return HostedByItemType(openaire.id.get, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) + case Constants.ISSN => HostedByItemType(openaire.id, openaire.officialname, openaire.journal_id, "", "", isOpenAccess) + case Constants.EISSN => HostedByItemType(openaire.id, openaire.officialname, "", openaire.journal_id, "", isOpenAccess) + case Constants.ISSNL => HostedByItemType(openaire.id, openaire.officialname, "", "", openaire.journal_id, isOpenAccess) // catch the default with a variable so you can print it - case whoa => return null + case whoa => null } } - def toHostedByMap(input: HostedByItemType): ListBuffer[String] = { - implicit val formats = DefaultFormats - val serializedJSON:String = write(input) +// def toHostedByMap(input: HostedByItemType): ListBuffer[String] = { +// implicit val formats = DefaultFormats +// val serializedJSON:String = write(input) +// +// var hostedBy = new ListBuffer[String]() +// if(!input.issn.equals("")){ +// hostedBy += "{\"" + input.issn + "\":" + serializedJSON + "}" +// } +// if(!input.eissn.equals("")){ +// hostedBy += "{\"" + input.eissn + "\":" + serializedJSON + "}" +// } +// if(!input.lissn.equals("")){ +// hostedBy += "{\"" + input.lissn + "\":" + serializedJSON + "}" +// } +// +// hostedBy +// +// } - var hostedBy = new ListBuffer[String]() + def getHostedByItemType(id:String, officialname: String, issn:String, eissn:String, issnl:String, oa:Boolean): HostedByItemType = { + if(issn != null){ + if(eissn != null){ + if(issnl != null){ + HostedByItemType(id, officialname, issn, eissn, issnl , oa) + }else{ + HostedByItemType(id, officialname, issn, eissn, "" , oa) + } + }else{ + if(issnl != null){ + HostedByItemType(id, officialname, issn, "", issnl , oa) + }else{ + HostedByItemType(id, officialname, issn, "", "" , oa) + } + } + }else{ + if(eissn != null){ + if(issnl != null){ + HostedByItemType(id, officialname, "", eissn, issnl , oa) + }else{ + HostedByItemType(id, officialname, "", eissn, "" , oa) + } + }else{ + if(issnl != null){ + HostedByItemType(id, officialname, "", "", issnl , oa) + }else{ + HostedByItemType("", "", "", "", "" , oa) + } + } + } + } + + def oaToHostedbyItemType(dats: Datasource): HostedByItemType = { + if (dats.getJournal != null) { + + return getHostedByItemType(dats.getId, dats.getOfficialname.getValue, dats.getJournal.getIssnPrinted, dats.getJournal.getIssnOnline, dats.getJournal.getIssnLinking, false) + } + HostedByItemType("","","","","",false) + } + + def oaHostedByDataset(spark:SparkSession, datasourcePath : String) : Dataset[HostedByItemType] = { + + import spark.implicits._ + + + val mapper = new ObjectMapper() + + implicit var encoderD = Encoders.kryo[Datasource] + + val dd : Dataset[Datasource] = spark.read.textFile(datasourcePath) + .map(r => mapper.readValue(r, classOf[Datasource])) + + dd.map{ddt => oaToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + + } + + + def goldToHostedbyItemType(gold: UnibiGoldModel): HostedByItemType = { + return getHostedByItemType(Constants.UNIBI, gold.getTitle, gold.getIssn, "", gold.getIssn_l, true) + } + + + def goldHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { + import spark.implicits._ + + implicit val mapEncoderUnibi: Encoder[UnibiGoldModel] = Encoders.kryo[UnibiGoldModel] + + val mapper = new ObjectMapper() + + val dd : Dataset[UnibiGoldModel] = spark.read.textFile(datasourcePath) + .map(r => mapper.readValue(r, classOf[UnibiGoldModel])) + + dd.map{ddt => goldToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + + } + + def doajToHostedbyItemType(doaj: DOAJModel): HostedByItemType = { + + return getHostedByItemType(Constants.DOAJ, doaj.getJournalTitle, doaj.getIssn, doaj.getEissn, "", true) + } + + def doajHostedByDataset(spark:SparkSession, datasourcePath:String) : Dataset[HostedByItemType] = { + import spark.implicits._ + + implicit val mapEncoderDOAJ: Encoder[DOAJModel] = Encoders.kryo[DOAJModel] + + val mapper = new ObjectMapper() + + val dd : Dataset[DOAJModel] = spark.read.textFile(datasourcePath) + .map(r => mapper.readValue(r, classOf[DOAJModel])) + + dd.map{ddt => doajToHostedbyItemType(ddt)}.filter(hb => !(hb.id.equals(""))) + + } + + def toList(input: HostedByItemType): List[(String, HostedByItemType)] = { + var lst : List[(String, HostedByItemType)] = List() if(!input.issn.equals("")){ - hostedBy += "{\"" + input.issn + "\":" + serializedJSON + "}" + lst = (input.issn, input) :: lst } if(!input.eissn.equals("")){ - hostedBy += "{\"" + input.eissn + "\":" + serializedJSON + "}" + lst = (input.eissn, input) :: lst } if(!input.lissn.equals("")){ - hostedBy += "{\"" + input.lissn + "\":" + serializedJSON + "}" + lst = (input.lissn, input) :: lst } - - hostedBy - + lst } - def readOADataset(input:String, spark: SparkSession): Dataset[HostedByInfo] = { - spark.read.textFile(input).as[Datasource].flatMap(ds => { - val lst = new ListBuffer[HostedByInfo]() - if (ds.getJournal == null) { - return null - } - val issn: String = ds.getJournal.getIssnPrinted - val issnl: String = ds.getJournal.getIssnOnline - val eissn: String = ds.getJournal.getIssnOnline - val id: String = ds.getId - val officialname: String = ds.getOfficialname.getValue - if (issn != null) { - lst += HostedByInfo(Some(id), officialname, issn, Constants.OPENAIRE, Constants.ISSN) - } - if (issnl != null) { - lst += HostedByInfo(Some(id), officialname, issnl, Constants.OPENAIRE, Constants.ISSNL) - } - if (eissn != null) { - lst += HostedByInfo(Some(id), officialname, eissn, Constants.OPENAIRE, Constants.EISSN) - } - lst - }).filter(i => i != null) - } - def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) @@ -105,53 +186,34 @@ object SparkPrepareHostedByMapData { + implicit val formats = DefaultFormats + logger.info("Getting the Datasources") - val doajDataset: Dataset[DOAJModel] = spark.read.load(workingDirPath + "/doaj").as[DOAJModel] - val unibiDataset: Dataset[UnibiGoldModel] = spark.read.load(datasourcePath).as[UnibiGoldModel] + // val doajDataset: Dataset[DOAJModel] = spark.read.textFile(workingDirPath + "/doaj").as[DOAJModel] - val oa: Dataset[HostedByInfo] = readOADataset(datasourcePath, spark) - - val doaj: Dataset[HostedByInfo] = doajDataset.flatMap(doaj => { - val lst = new ListBuffer[HostedByInfo]() - val issn: String = doaj.getIssn - val eissn: String = doaj.getEissn - val officialname: String = doaj.getJournalTitle - if (issn != null) { - lst += HostedByInfo(null, officialname, issn, Constants.DOAJ, Constants.ISSN) - } - if (eissn != null) { - lst += HostedByInfo(null, officialname, eissn, Constants.DOAJ, Constants.EISSN) - } - lst - }) - - val gold: Dataset[HostedByInfo] = unibiDataset.flatMap(gold => { - val lst = new ListBuffer[HostedByInfo]() - val issn: String = gold.getIssn - val issnl: String = gold.getIssn_l - val officialname: String = gold.getTitle - if (issn != null) { - lst += HostedByInfo(null, officialname, issn, Constants.UNIBI, Constants.ISSN) - } - if (issnl != null) { - lst += HostedByInfo(null, officialname, issnl, Constants.UNIBI, Constants.ISSNL) - } - lst - }) - - Aggregators.createHostedByItemTypes(oa.joinWith(doaj, oa.col("journal_id").equalTo(doaj.col("journal_id")), "left") - .joinWith(gold, $"_1.col('journal_id')".equalTo(gold.col("journal_id")), "left").map(toHostedByItemType) - .filter(i => i != null)) - .flatMap(toHostedByMap) - // .map(i => (i.id,i)) - // .groupByKey(_._1) - // .agg(hostedByAggregator.toColumn) - // .map(p => p._2) - .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/HostedByMap") + val dats : Dataset[HostedByItemType] = + oaHostedByDataset(spark, datasourcePath) + .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold")) + .union(doajHostedByDataset(spark, workingDirPath + "/doaj")) + dats.flatMap(hbi => toList(hbi)) + .groupByKey(_._1) +// +// + +// + +// +// Aggregators.createHostedByItemTypes(oa.joinWith(doaj, oa.col("journal_id").equalTo(doaj.col("journal_id")), "left") +// .joinWith(gold, $"_1.col('journal_id')".equalTo(gold.col("journal_id")), "left").map(toHostedByItemType) +// .filter(i => i != null)) +// .flatMap(toHostedByMap) +// .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/HostedByMap") +// +// } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala index 657f20fce..8e5657bfc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -2,22 +2,23 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import java.sql.Timestamp -import eu.dnetlib.dhp.oa.graph.hostebymap.SparkPrepareHostedByMapData -import eu.dnetlib.dhp.oa.graph.hostebymap.SparkPrepareHostedByMapData.HostedByInfo +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.oa.graph.hostebymap.{Constants, HostedByInfo, SparkPrepareHostedByMapData} +import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, SparkSession} -import org.codehaus.jackson.map.ObjectMapper +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue} import org.junit.jupiter.api.Test import org.slf4j.{Logger, LoggerFactory} +import scala.collection.mutable.ListBuffer import scala.io.Source -class TestPreprocess { +class TestPreprocess extends java.io.Serializable{ - val logger: Logger = LoggerFactory.getLogger(getClass) - val mapper = new ObjectMapper() + implicit val mapEncoderDats: Encoder[Datasource] = Encoders.kryo[Datasource] + implicit val schema = Encoders.product[HostedByInfo] @@ -27,7 +28,11 @@ class TestPreprocess { import org.apache.spark.sql.Encoders implicit val formats = DefaultFormats - import org.json4s.jackson.Serialization.write + + val logger: Logger = LoggerFactory.getLogger(getClass) + val mapper = new ObjectMapper() + + val conf = new SparkConf() conf.setMaster("local[*]") @@ -41,12 +46,67 @@ class TestPreprocess { val path = getClass.getResource("datasource.json").getPath - val schema = Encoders.product[HostedByInfo] + println(SparkPrepareHostedByMapData.oaHostedByDataset(spark, path).count) - spark.read.textFile(path).foreach(r => println(mapper.writeValueAsString(r))) -// SparkPrepareHostedByMapData.readOADataset(path, spark) -// .foreach(r => println(write(r))) + + spark.close() + } + + + @Test + def readGold():Unit = { + + implicit val formats = DefaultFormats + + val logger: Logger = LoggerFactory.getLogger(getClass) + val mapper = new ObjectMapper() + + + + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val path = getClass.getResource("unibi_transformed.json").getPath + + + println(SparkPrepareHostedByMapData.goldHostedByDataset(spark, path).count) + + + + spark.close() + } + + @Test + def readDoaj():Unit = { + + implicit val formats = DefaultFormats + + val logger: Logger = LoggerFactory.getLogger(getClass) + val mapper = new ObjectMapper() + + + + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val path = getClass.getResource("doaj_transformed.json").getPath + + + println(SparkPrepareHostedByMapData.doajHostedByDataset(spark, path).count) + spark.close() @@ -55,5 +115,4 @@ class TestPreprocess { - } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json index 15e1dcc45..818aaa716 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json @@ -1,10 +1,10 @@ -{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://www.energyret.ru/jour/"}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2640-4613","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"extraInfo":[],"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2605-8359","issnPrinted":"0751-4239","name":"Cahiers d’études germaniques"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl07514239"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0751-4239"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"extraInfo":[],"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2311-8733","issnPrinted":"2073-1477","name":"Regional Economics Theory and Practice"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl20731477"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2073-1477"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"extraInfo":[],"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0041-1337","name":"Transplantation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00411337"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0041-1337"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"extraInfo":[],"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"1947-9336","issnPrinted":"1947-9328","name":"International Journal of Operations Research and Information Systems"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl19479328"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::1947-9328"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"extraInfo":[],"id":"10|issn___print::b9faf9c36c47169d4328e586eb62247c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0007-1528","name":"Bulletin of the British Mycological Society"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00071528"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0007-1528"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"extraInfo":[],"id":"10|issn__online::709e633c2ecf46396a4ed1b0096da1d0","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2410-3993","issnPrinted":"","name":"Journal of Technology and Innovation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24103993"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn__online::2410-3993"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} -{"accessinfopackage":[],"citationguidelineurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://www.gbif.org/citation-guidelines"},"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"databaseaccesstype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"open"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"datarepository::unknown","classname":"Data Repository","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datauploadrestriction":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"other"},"datauploadtype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"restricted"},"dateofcollection":"2019-02-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bavarian Natural History Collections - occurrence data"},"extraInfo":[],"id":"10|re3data_____::b105fa2123b1e2bc3dfff303454c6f72","lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"missionstatementurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"r3b105fa2123"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DWB BioCASe Data Publication pipeline and RDF service"},"openairecompatibility":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["re3data_____::r3d100012934"],"pid":[],"pidsystems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DOI URN"},"policies":[],"qualitymanagementkind":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"yes"},"releasestartdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2006-01-01"},"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Life Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Plant Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Zoology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Evolution, Anthropology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Biology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Natural Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geology and Palaeontology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geochemistry, Mineralogy and Crystallography"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geosciences (including Geography)"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/dwb_biocase.html"}} \ No newline at end of file +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":null,"dataprovider":{"dataInfo":null,"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":null,"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":null,"value":"0.0"},"longitude":{"dataInfo":null,"value":"0.0"},"namespaceprefix":{"dataInfo":null,"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":null,"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":null,"value":"0.0"},"officialname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":null,"value":false},"subjects":[{"dataInfo":null,"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":null,"value":false},"websiteurl":{"dataInfo":null,"value":"https://www.energyret.ru/jour/"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2640-4613","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"extraInfo":[],"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2605-8359","issnPrinted":"0751-4239","name":"Cahiers d’études germaniques"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl07514239"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0751-4239"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"extraInfo":[],"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2311-8733","issnPrinted":"2073-1477","name":"Regional Economics Theory and Practice"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl20731477"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2073-1477"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"extraInfo":[],"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0041-1337","name":"Transplantation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00411337"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0041-1337"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"extraInfo":[],"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"1947-9336","issnPrinted":"1947-9328","name":"International Journal of Operations Research and Information Systems"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl19479328"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"International Journal of Operations Research and Information Systems"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::1947-9328"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"extraInfo":[],"id":"10|issn___print::b9faf9c36c47169d4328e586eb62247c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0007-1528","name":"Bulletin of the British Mycological Society"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00071528"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bulletin of the British Mycological Society"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0007-1528"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"extraInfo":[],"id":"10|issn__online::709e633c2ecf46396a4ed1b0096da1d0","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2410-3993","issnPrinted":"","name":"Journal of Technology and Innovation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl24103993"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal of Technology and Innovation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn__online::2410-3993"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"citationguidelineurl":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://www.gbif.org/citation-guidelines"},"collectedfrom":[{"key":"10|openaire____::21f8a223b9925c2f87c404096080b046","value":"Registry of Research Data Repository"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"databaseaccesstype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"open"},"dataprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"datarepository::unknown","classname":"Data Repository","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"datauploadrestriction":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"other"},"datauploadtype":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"restricted"},"dateofcollection":"2019-02-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Bavarian Natural History Collections - occurrence data"},"extraInfo":[],"id":"10|re3data_____::b105fa2123b1e2bc3dfff303454c6f72","lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"missionstatementurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"r3b105fa2123"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DWB BioCASe Data Publication pipeline and RDF service"},"openairecompatibility":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["re3data_____::r3d100012934"],"pid":[],"pidsystems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"DOI URN"},"policies":[],"qualitymanagementkind":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"yes"},"releasestartdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2006-01-01"},"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Life Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Plant Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Zoology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Evolution, Anthropology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Biology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Natural Sciences"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geology and Palaeontology"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geochemistry, Mineralogy and Crystallography"},{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Geosciences (including Geography)"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":true},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"http://www.snsb.info/dwb_biocase.html"}} \ No newline at end of file From 1fb572a33ae1fa374d3237aeefb977de8de6685e Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 28 Jul 2021 10:52:24 +0200 Subject: [PATCH 007/166] added eosc fields --- .../dnetlib/dhp/oa/graph/sql/queryDatasources.sql | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql index f0a4161ab..f58677101 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql @@ -84,13 +84,18 @@ SELECT dc.id AS collectedfromid, dc.officialname AS collectedfromname, d.typology||'@@@dnet:datasource_typologies' AS datasourcetype, + d.typology||'@@@dnet:datasource_typologies_ui' AS datasourcetypeui, 'sysimport:crosswalk:entityregistry@@@dnet:provenance_actions' AS provenanceaction, d.issn AS issnPrinted, d.eissn AS issnOnline, - d.lissn AS issnLinking + d.lissn AS issnLinking, + de.jurisdiction AS jurisdiction, + de.thematic AS thematic, + de.knowledge_graph AS knowledgegraph, + de.content_policies AS contentpolicies FROM dsm_datasources d - +LEFT OUTER JOIN dsm_datasources_eosc de on (d.id = de.id) LEFT OUTER JOIN dsm_datasources dc on (d.collectedfrom = dc.id) LEFT OUTER JOIN dsm_api a ON (d.id = a.datasource) LEFT OUTER JOIN dsm_datasourcepids di ON (d.id = di.datasource) @@ -126,4 +131,8 @@ GROUP BY dc.officialname, d.issn, d.eissn, - d.lissn + d.lissn, + de.jurisdiction, + de.thematic, + de.knowledge_graph, + de.content_policies From c72c960ffb61bcd7db272d387d34fa4d044e25c3 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 28 Jul 2021 11:03:15 +0200 Subject: [PATCH 008/166] added eosc fields --- .../eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql index f58677101..98092e882 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryDatasources.sql @@ -89,10 +89,10 @@ SELECT d.issn AS issnPrinted, d.eissn AS issnOnline, d.lissn AS issnLinking, - de.jurisdiction AS jurisdiction, + de.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction, de.thematic AS thematic, de.knowledge_graph AS knowledgegraph, - de.content_policies AS contentpolicies + array(select unnest(de.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies FROM dsm_datasources d LEFT OUTER JOIN dsm_datasources_eosc de on (d.id = de.id) From e6f1773d637cc02ca3424d1c76658156cfb78399 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 28 Jul 2021 11:17:11 +0200 Subject: [PATCH 009/166] mapping of new eosc fields --- .../raw/MigrateDbEntitiesApplication.java | 97 ++++++++++++++----- 1 file changed, 74 insertions(+), 23 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index a9d3e05fe..d78732f9b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -1,8 +1,41 @@ package eu.dnetlib.dhp.oa.graph.raw; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; -import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASET_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DATASOURCE_ORGANIZATION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ENTITYREGISTRY_PROVENANCE_ACTION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.HAS_PARTICIPANT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_MERGED_IN; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PARTICIPANT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PRODUCED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_PROVIDED_BY; +import static eu.dnetlib.dhp.schema.common.ModelConstants.IS_RELATED_TO; +import static eu.dnetlib.dhp.schema.common.ModelConstants.MERGES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ORG_ORG_RELTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.ORP_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.OUTCOME; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PARTICIPATION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PRODUCES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROJECT_ORGANIZATION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVIDES; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVISION; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DATASET; +import static eu.dnetlib.dhp.schema.common.ModelConstants.PUBLICATION_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RELATIONSHIP; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_PROJECT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.RESULT_RESULT; +import static eu.dnetlib.dhp.schema.common.ModelConstants.SOFTWARE_DEFAULT_RESULTTYPE; +import static eu.dnetlib.dhp.schema.common.ModelConstants.USER_CLAIM; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.asString; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.createOpenaireId; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.dataInfo; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.field; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.journal; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listFields; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier; +import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; import java.io.Closeable; import java.io.IOException; @@ -53,8 +86,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i private static final Logger log = LoggerFactory.getLogger(MigrateDbEntitiesApplication.class); private static final DataInfo DATA_INFO_CLAIM = dataInfo( - false, null, false, false, - qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), "0.9"); + false, null, false, false, qualifier(USER_CLAIM, USER_CLAIM, DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS), + "0.9"); private static final List COLLECTED_FROM_CLAIM = listKeyValues( createOpenaireId(10, "infrastruct_::openaire", true), "OpenAIRE"); @@ -140,8 +173,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i case openorgs_dedup: // generates organization entities and relations for openorgs dedup log.info("Processing Openorgs..."); smdbe - .execute( - "queryOpenOrgsForOrgsDedup.sql", smdbe::processOrganization, verifyNamespacePrefix); + .execute("queryOpenOrgsForOrgsDedup.sql", smdbe::processOrganization, verifyNamespacePrefix); log.info("Processing Openorgs Sim Rels..."); smdbe.execute("queryOpenOrgsSimilarityForOrgsDedup.sql", smdbe::processOrgOrgSimRels); @@ -150,8 +182,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i case openorgs: // generates organization entities and relations for provision log.info("Processing Openorgs For Provision..."); smdbe - .execute( - "queryOpenOrgsForProvision.sql", smdbe::processOrganization, verifyNamespacePrefix); + .execute("queryOpenOrgsForProvision.sql", smdbe::processOrganization, verifyNamespacePrefix); log.info("Processing Openorgs Merge Rels..."); smdbe.execute("queryOpenOrgsSimilarityForProvision.sql", smdbe::processOrgOrgMergeRels); @@ -229,6 +260,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i ds.setExtraInfo(new ArrayList<>()); // Values not present in the DB ds.setOaiprovenance(null); // Values not present in the DB ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype"))); + ds.setDatasourcetypeui(prepareQualifierSplitting(rs.getString("datasourcetypeui"))); ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility"))); ds.setOfficialname(field(rs.getString("officialname"), info)); ds.setEnglishname(field(rs.getString("englishname"), info)); @@ -270,6 +302,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i ds.setDataInfo(info); ds.setLastupdatetimestamp(lastUpdateTimestamp); + ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction"))); + ds.setThematic(rs.getBoolean("thematic")); + ds.setKnowledgegraph(rs.getBoolean("knowledgegraph")); + ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies"))); + return Arrays.asList(ds); } catch (final Exception e) { throw new RuntimeException(e); @@ -495,8 +532,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i throw new IllegalStateException( String .format( - "invalid claim, sourceId: %s, targetId: %s, semantics: %s", - sourceId, targetId, semantics)); + "invalid claim, sourceId: %s, targetId: %s, semantics: %s", sourceId, targetId, + semantics)); } r1 = setRelationSemantic(r1, RESULT_PROJECT, OUTCOME, PRODUCES); r2 = setRelationSemantic(r2, RESULT_PROJECT, OUTCOME, IS_PRODUCED_BY); @@ -516,8 +553,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i } } - private Relation prepareRelation(String sourceId, String targetId, String validationDate) { - Relation r = new Relation(); + private Relation prepareRelation(final String sourceId, final String targetId, final String validationDate) { + final Relation r = new Relation(); if (StringUtils.isNotBlank(validationDate)) { r.setValidated(true); r.setValidationDate(validationDate); @@ -530,7 +567,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i return r; } - private Relation setRelationSemantic(Relation r, String relType, String subRelType, String relClass) { + private Relation setRelationSemantic(final Relation r, final String relType, final String subRelType, + final String relClass) { r.setRelType(relType); r.setSubRelType(subRelType); r.setRelClass(relClass); @@ -603,6 +641,19 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i return res; } + private List prepareListOfQualifiers(final Array array) throws SQLException { + final List res = new ArrayList<>(); + if (array != null) { + for (final String s : (String[]) array.getArray()) { + final Qualifier q = prepareQualifierSplitting(s); + if (q != null) { + res.add(q); + } + } + } + return res; + } + public List processOrgOrgMergeRels(final ResultSet rs) { try { final DataInfo info = prepareDataInfo(rs); // TODO @@ -660,16 +711,16 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i r1.setLastupdatetimestamp(lastUpdateTimestamp); // removed because there's no difference between two sides //TODO -// final Relation r2 = new Relation(); -// r2.setRelType(ORG_ORG_RELTYPE); -// r2.setSubRelType(ORG_ORG_SUBRELTYPE); -// r2.setRelClass(relClass); -// r2.setSource(orgId2); -// r2.setTarget(orgId1); -// r2.setCollectedfrom(collectedFrom); -// r2.setDataInfo(info); -// r2.setLastupdatetimestamp(lastUpdateTimestamp); -// return Arrays.asList(r1, r2); + // final Relation r2 = new Relation(); + // r2.setRelType(ORG_ORG_RELTYPE); + // r2.setSubRelType(ORG_ORG_SUBRELTYPE); + // r2.setRelClass(relClass); + // r2.setSource(orgId2); + // r2.setTarget(orgId1); + // r2.setCollectedfrom(collectedFrom); + // r2.setDataInfo(info); + // r2.setLastupdatetimestamp(lastUpdateTimestamp); + // return Arrays.asList(r1, r2); return Arrays.asList(r1); } catch (final Exception e) { From 3d2bba3d5d6f346d3fed3fc80511e9bed78fa92c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 28 Jul 2021 11:25:43 +0200 Subject: [PATCH 010/166] removing not needed classes --- .../main/java/eu/dnetlib/doiboost/GetCSV.java | 37 ----- .../eu/dnetlib/doiboost/UnibiGoldModel.java | 151 ------------------ 2 files changed, 188 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java delete mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java deleted file mode 100644 index 00b6b184b..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/GetCSV.java +++ /dev/null @@ -1,37 +0,0 @@ - -package eu.dnetlib.doiboost; - -import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; - -public class GetCSV { - private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV.class); - - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - GetCSV.class - .getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/download_unibi_issn_gold_parameters.json"))); - - parser.parseArgument(args); - - final String fileURL = parser.get("fileURL"); - final String hdfsPath = parser.get("hdfsPath"); - final String hdfsNameNode = parser.get("hdfsNameNode"); - final String classForName = parser.get("classForName"); - - try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL, ',')) { - - log.info("Getting CSV file..."); - readCSV.execute(classForName); - - } - } - -} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java deleted file mode 100644 index e5bd49ada..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/UnibiGoldModel.java +++ /dev/null @@ -1,151 +0,0 @@ - -package eu.dnetlib.doiboost; - -import java.io.Serializable; - -public class UnibiGoldModel implements Serializable { - private String ISSN; - private String ISSN_L; - private String ISSN_IN_DOAJ; - private String ISSN_IN_ROAD; - private String ISSN_IN_PMC; - private String ISSN_IN_OAPC; - private String ISSN_IN_WOS; - private String ISSN_IN_SCOPUS; - private String JOURNAL_IN_DOAJ; - private String JOURNAL_IN_ROAD; - private String JOURNAL_IN_PMC; - private String JOURNAL_IN_OAPC; - private String JOURNAL_IN_WOS; - private String JOURNAL_IN_SCOPUS; - private String TITLE; - private String TITLE_SOURCE; - - public String getISSN() { - return ISSN; - } - - public void setISSN(String ISSN) { - this.ISSN = ISSN; - } - - public String getISSN_L() { - return ISSN_L; - } - - public void setISSN_L(String ISSN_L) { - this.ISSN_L = ISSN_L; - } - - public String getISSN_IN_DOAJ() { - return ISSN_IN_DOAJ; - } - - public void setISSN_IN_DOAJ(String ISSN_IN_DOAJ) { - this.ISSN_IN_DOAJ = ISSN_IN_DOAJ; - } - - public String getISSN_IN_ROAD() { - return ISSN_IN_ROAD; - } - - public void setISSN_IN_ROAD(String ISSN_IN_ROAD) { - this.ISSN_IN_ROAD = ISSN_IN_ROAD; - } - - public String getISSN_IN_PMC() { - return ISSN_IN_PMC; - } - - public void setISSN_IN_PMC(String ISSN_IN_PMC) { - this.ISSN_IN_PMC = ISSN_IN_PMC; - } - - public String getISSN_IN_OAPC() { - return ISSN_IN_OAPC; - } - - public void setISSN_IN_OAPC(String ISSN_IN_OAPC) { - this.ISSN_IN_OAPC = ISSN_IN_OAPC; - } - - public String getISSN_IN_WOS() { - return ISSN_IN_WOS; - } - - public void setISSN_IN_WOS(String ISSN_IN_WOS) { - this.ISSN_IN_WOS = ISSN_IN_WOS; - } - - public String getISSN_IN_SCOPUS() { - return ISSN_IN_SCOPUS; - } - - public void setISSN_IN_SCOPUS(String ISSN_IN_SCOPUS) { - this.ISSN_IN_SCOPUS = ISSN_IN_SCOPUS; - } - - public String getJOURNAL_IN_DOAJ() { - return JOURNAL_IN_DOAJ; - } - - public void setJOURNAL_IN_DOAJ(String JOURNAL_IN_DOAJ) { - this.JOURNAL_IN_DOAJ = JOURNAL_IN_DOAJ; - } - - public String getJOURNAL_IN_ROAD() { - return JOURNAL_IN_ROAD; - } - - public void setJOURNAL_IN_ROAD(String JOURNAL_IN_ROAD) { - this.JOURNAL_IN_ROAD = JOURNAL_IN_ROAD; - } - - public String getJOURNAL_IN_PMC() { - return JOURNAL_IN_PMC; - } - - public void setJOURNAL_IN_PMC(String JOURNAL_IN_PMC) { - this.JOURNAL_IN_PMC = JOURNAL_IN_PMC; - } - - public String getJOURNAL_IN_OAPC() { - return JOURNAL_IN_OAPC; - } - - public void setJOURNAL_IN_OAPC(String JOURNAL_IN_OAPC) { - this.JOURNAL_IN_OAPC = JOURNAL_IN_OAPC; - } - - public String getJOURNAL_IN_WOS() { - return JOURNAL_IN_WOS; - } - - public void setJOURNAL_IN_WOS(String JOURNAL_IN_WOS) { - this.JOURNAL_IN_WOS = JOURNAL_IN_WOS; - } - - public String getJOURNAL_IN_SCOPUS() { - return JOURNAL_IN_SCOPUS; - } - - public void setJOURNAL_IN_SCOPUS(String JOURNAL_IN_SCOPUS) { - this.JOURNAL_IN_SCOPUS = JOURNAL_IN_SCOPUS; - } - - public String getTITLE() { - return TITLE; - } - - public void setTITLE(String TITLE) { - this.TITLE = TITLE; - } - - public String getTITLE_SOURCE() { - return TITLE_SOURCE; - } - - public void setTITLE_SOURCE(String TITLE_SOURCE) { - this.TITLE_SOURCE = TITLE_SOURCE; - } -} From 9f1c7b8e177f479289fda28f933de5bab04fb7ce Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 28 Jul 2021 11:32:34 +0200 Subject: [PATCH 011/166] tests --- .../raw/MigrateDbEntitiesApplicationTest.java | 192 ++++++++++-------- .../raw/datasources_resultset_entry.json | 27 +++ 2 files changed, 132 insertions(+), 87 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index d529d2eb2..2078735f3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -28,7 +28,12 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; @ExtendWith(MockitoExtension.class) @@ -46,11 +51,8 @@ public class MigrateDbEntitiesApplicationTest { public void setUp() { lenient() .when(vocs.getTermAsQualifier(anyString(), anyString())) - .thenAnswer( - invocation -> OafMapperUtils - .qualifier( - invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0), - invocation.getArgument(0))); + .thenAnswer(invocation -> OafMapperUtils + .qualifier(invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0), invocation.getArgument(0))); lenient().when(vocs.termExists(anyString(), anyString())).thenReturn(true); @@ -78,6 +80,23 @@ public class MigrateDbEntitiesApplicationTest { assertEquals(getValueAsString("issnPrinted", fields), ds.getJournal().getIssnPrinted()); assertEquals(getValueAsString("issnOnline", fields), ds.getJournal().getIssnOnline()); assertEquals(getValueAsString("issnLinking", fields), ds.getJournal().getIssnLinking()); + + assertEquals("pubsrepository::journal", ds.getDatasourcetype().getClassid()); + assertEquals("dnet:datasource_typologies", ds.getDatasourcetype().getSchemeid()); + + assertEquals("pubsrepository::journal", ds.getDatasourcetypeui().getClassid()); + assertEquals("dnet:datasource_typologies_ui", ds.getDatasourcetypeui().getSchemeid()); + + assertEquals("National", ds.getJurisdiction().getClassid()); + assertEquals("eosc:jurisdictions", ds.getJurisdiction().getSchemeid()); + + assertTrue(ds.getThematic()); + assertTrue(ds.getKnowledgegraph()); + + assertEquals(1, ds.getContentpolicies().size()); + assertEquals("Journal article", ds.getContentpolicies().get(0).getClassid()); + assertEquals("eosc:contentpolicies", ds.getContentpolicies().get(0).getSchemeid()); + } @Test @@ -119,7 +138,7 @@ public class MigrateDbEntitiesApplicationTest { assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemeid()); assertEquals(getValueAsString("country", fields).split("@@@")[1], o.getCountry().getSchemename()); assertEquals(getValueAsString("collectedfromname", fields), o.getCollectedfrom().get(0).getValue()); - List alternativenames = getValueAsList("alternativenames", fields); + final List alternativenames = getValueAsList("alternativenames", fields); assertEquals(2, alternativenames.size()); assertTrue(alternativenames.contains("Pippo")); assertTrue(alternativenames.contains("Foo")); @@ -216,73 +235,72 @@ public class MigrateDbEntitiesApplicationTest { private List prepareMocks(final String jsonFile) throws IOException, SQLException { final String json = IOUtils.toString(getClass().getResourceAsStream(jsonFile)); final ObjectMapper mapper = new ObjectMapper(); - final List list = mapper.readValue(json, new TypeReference>() { - }); + final List list = mapper.readValue(json, new TypeReference>() {}); for (final TypedField tf : list) { if (tf.getValue() == null) { switch (tf.getType()) { - case "not_used": - break; - case "boolean": - Mockito.when(rs.getBoolean(tf.getField())).thenReturn(false); - break; - case "date": - Mockito.when(rs.getDate(tf.getField())).thenReturn(null); - break; - case "int": - Mockito.when(rs.getInt(tf.getField())).thenReturn(0); - break; - case "double": - Mockito.when(rs.getDouble(tf.getField())).thenReturn(0.0); - break; - case "array": - Mockito.when(rs.getArray(tf.getField())).thenReturn(null); - break; - case "string": - default: - Mockito.when(rs.getString(tf.getField())).thenReturn(null); - break; + case "not_used": + break; + case "boolean": + Mockito.when(rs.getBoolean(tf.getField())).thenReturn(false); + break; + case "date": + Mockito.when(rs.getDate(tf.getField())).thenReturn(null); + break; + case "int": + Mockito.when(rs.getInt(tf.getField())).thenReturn(0); + break; + case "double": + Mockito.when(rs.getDouble(tf.getField())).thenReturn(0.0); + break; + case "array": + Mockito.when(rs.getArray(tf.getField())).thenReturn(null); + break; + case "string": + default: + Mockito.when(rs.getString(tf.getField())).thenReturn(null); + break; } } else { switch (tf.getType()) { - case "not_used": - break; - case "boolean": - Mockito - .when(rs.getBoolean(tf.getField())) - .thenReturn(Boolean.parseBoolean(tf.getValue().toString())); - break; - case "date": - Mockito - .when(rs.getDate(tf.getField())) - .thenReturn(Date.valueOf(tf.getValue().toString())); - break; - case "int": - Mockito - .when(rs.getInt(tf.getField())) - .thenReturn(new Integer(tf.getValue().toString())); - break; - case "double": - Mockito - .when(rs.getDouble(tf.getField())) - .thenReturn(new Double(tf.getValue().toString())); - break; - case "array": - final Array arr = Mockito.mock(Array.class); - final String[] values = ((List) tf.getValue()) - .stream() - .filter(Objects::nonNull) - .map(o -> o.toString()) - .toArray(String[]::new); + case "not_used": + break; + case "boolean": + Mockito + .when(rs.getBoolean(tf.getField())) + .thenReturn(Boolean.parseBoolean(tf.getValue().toString())); + break; + case "date": + Mockito + .when(rs.getDate(tf.getField())) + .thenReturn(Date.valueOf(tf.getValue().toString())); + break; + case "int": + Mockito + .when(rs.getInt(tf.getField())) + .thenReturn(new Integer(tf.getValue().toString())); + break; + case "double": + Mockito + .when(rs.getDouble(tf.getField())) + .thenReturn(new Double(tf.getValue().toString())); + break; + case "array": + final Array arr = Mockito.mock(Array.class); + final String[] values = ((List) tf.getValue()) + .stream() + .filter(Objects::nonNull) + .map(o -> o.toString()) + .toArray(String[]::new); - Mockito.when(arr.getArray()).thenReturn(values); - Mockito.when(rs.getArray(tf.getField())).thenReturn(arr); - break; - case "string": - default: - Mockito.when(rs.getString(tf.getField())).thenReturn(tf.getValue().toString()); - break; + Mockito.when(arr.getArray()).thenReturn(values); + Mockito.when(rs.getArray(tf.getField())).thenReturn(arr); + break; + case "string": + default: + Mockito.when(rs.getString(tf.getField())).thenReturn(tf.getValue().toString()); + break; } } } @@ -294,27 +312,27 @@ public class MigrateDbEntitiesApplicationTest { for (final TypedField tf : list) { switch (tf.getType()) { - case "not_used": - break; - case "boolean": - Mockito.verify(rs, Mockito.atLeastOnce()).getBoolean(tf.getField()); - break; - case "date": - Mockito.verify(rs, Mockito.atLeastOnce()).getDate(tf.getField()); - break; - case "int": - Mockito.verify(rs, Mockito.atLeastOnce()).getInt(tf.getField()); - break; - case "double": - Mockito.verify(rs, Mockito.atLeastOnce()).getDouble(tf.getField()); - break; - case "array": - Mockito.verify(rs, Mockito.atLeastOnce()).getArray(tf.getField()); - break; - case "string": - default: - Mockito.verify(rs, Mockito.atLeastOnce()).getString(tf.getField()); - break; + case "not_used": + break; + case "boolean": + Mockito.verify(rs, Mockito.atLeastOnce()).getBoolean(tf.getField()); + break; + case "date": + Mockito.verify(rs, Mockito.atLeastOnce()).getDate(tf.getField()); + break; + case "int": + Mockito.verify(rs, Mockito.atLeastOnce()).getInt(tf.getField()); + break; + case "double": + Mockito.verify(rs, Mockito.atLeastOnce()).getDouble(tf.getField()); + break; + case "array": + Mockito.verify(rs, Mockito.atLeastOnce()).getArray(tf.getField()); + break; + case "string": + default: + Mockito.verify(rs, Mockito.atLeastOnce()).getString(tf.getField()); + break; } } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json index befa722e1..42b140306 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/datasources_resultset_entry.json @@ -222,6 +222,11 @@ "type": "string", "value": "pubsrepository::journal@@@dnet:datasource_typologies" }, + { + "field": "datasourcetypeui", + "type": "string", + "value": "pubsrepository::journal@@@dnet:datasource_typologies_ui" + }, { "field": "provenanceaction", "type": "not_used", @@ -241,5 +246,27 @@ "field": "issnLinking", "type": "string", "value": "2579-5447" + }, + { + "field": "jurisdiction", + "type": "string", + "value": "National@@@eosc:jurisdictions" + }, + { + "field": "thematic", + "type": "boolean", + "value": true + }, + { + "field": "knowledgegraph", + "type": "boolean", + "value": true + }, + { + "field": "contentpolicies", + "type": "array", + "value": [ + "Journal article@@@eosc:contentpolicies" + ] } ] From 3e2a2d6e71c4cb9820c858ed114cea9b9ac25021 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 28 Jul 2021 11:56:55 +0200 Subject: [PATCH 012/166] added new fields in xml --- .../CreateRelatedEntitiesJob_phase1.java | 92 ++++++++++--------- .../oa/provision/utils/XmlRecordFactory.java | 24 +++++ 2 files changed, 73 insertions(+), 43 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java index 7534ce4bd..b2abbc156 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java @@ -27,14 +27,20 @@ import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Project; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits; import scala.Tuple2; /** - * CreateRelatedEntitiesJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type - * E_i map E_i as RelatedEntity T_i to simplify the model and extracting only the necessary information join - * (R.target = T_i.id) save the tuples (R_i, T_i) + * CreateRelatedEntitiesJob: (phase 1): prepare tuples [relation - target entity] (R - T): for each entity type E_i map E_i as RelatedEntity + * T_i to simplify the model and extracting only the necessary information join (R.target = T_i.id) save the tuples (R_i, T_i) */ public class CreateRelatedEntitiesJob_phase1 { @@ -42,68 +48,65 @@ public class CreateRelatedEntitiesJob_phase1 { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { + public static void main(final String[] args) throws Exception { - String jsonConfiguration = IOUtils + final String jsonConfiguration = IOUtils .toString( PrepareRelationsJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json")); + .getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); - Boolean isSparkSessionManaged = Optional + final Boolean isSparkSessionManaged = Optional .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); log.info("isSparkSessionManaged: {}", isSparkSessionManaged); - String inputRelationsPath = parser.get("inputRelationsPath"); + final String inputRelationsPath = parser.get("inputRelationsPath"); log.info("inputRelationsPath: {}", inputRelationsPath); - String inputEntityPath = parser.get("inputEntityPath"); + final String inputEntityPath = parser.get("inputEntityPath"); log.info("inputEntityPath: {}", inputEntityPath); - String outputPath = parser.get("outputPath"); + final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - String graphTableClassName = parser.get("graphTableClassName"); + final String graphTableClassName = parser.get("graphTableClassName"); log.info("graphTableClassName: {}", graphTableClassName); - Class entityClazz = (Class) Class.forName(graphTableClassName); + final Class entityClazz = (Class) Class.forName(graphTableClassName); - SparkConf conf = new SparkConf(); + final SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.registerKryoClasses(ProvisionModelSupport.getModelClasses()); - runWithSparkSession( - conf, - isSparkSessionManaged, - spark -> { - removeOutputDir(spark, outputPath); - joinRelationEntity(spark, inputRelationsPath, inputEntityPath, entityClazz, outputPath); - }); + runWithSparkSession(conf, isSparkSessionManaged, spark -> { + removeOutputDir(spark, outputPath); + joinRelationEntity(spark, inputRelationsPath, inputEntityPath, entityClazz, outputPath); + }); } private static void joinRelationEntity( - SparkSession spark, - String inputRelationsPath, - String inputEntityPath, - Class clazz, - String outputPath) { + final SparkSession spark, + final String inputRelationsPath, + final String inputEntityPath, + final Class clazz, + final String outputPath) { - Dataset> relsByTarget = readPathRelation(spark, inputRelationsPath) + final Dataset> relsByTarget = readPathRelation(spark, inputRelationsPath) .map( (MapFunction>) r -> new Tuple2<>(r.getTarget(), r), Encoders.tuple(Encoders.STRING(), Encoders.kryo(Relation.class))) .cache(); - Dataset> entities = readPathEntity(spark, inputEntityPath, clazz) + final Dataset> entities = readPathEntity(spark, inputEntityPath, clazz) .filter("dataInfo.invisible == false") .map( (MapFunction>) e -> new Tuple2<>(e.getId(), asRelatedEntity(e, clazz)), - Encoders.tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class))) + Encoders + .tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class))) .cache(); relsByTarget @@ -118,7 +121,9 @@ public class CreateRelatedEntitiesJob_phase1 { } private static Dataset readPathEntity( - SparkSession spark, String inputEntityPath, Class entityClazz) { + final SparkSession spark, + final String inputEntityPath, + final Class entityClazz) { log.info("Reading Graph table from: {}", inputEntityPath); return spark @@ -129,7 +134,7 @@ public class CreateRelatedEntitiesJob_phase1 { Encoders.bean(entityClazz)); } - public static RelatedEntity asRelatedEntity(E entity, Class clazz) { + public static RelatedEntity asRelatedEntity(final E entity, final Class clazz) { final RelatedEntity re = new RelatedEntity(); re.setId(entity.getId()); @@ -143,7 +148,7 @@ public class CreateRelatedEntitiesJob_phase1 { case dataset: case otherresearchproduct: case software: - Result result = (Result) entity; + final Result result = (Result) entity; if (result.getTitle() != null && !result.getTitle().isEmpty()) { final StructuredProperty title = result.getTitle().stream().findFirst().get(); @@ -170,16 +175,17 @@ public class CreateRelatedEntitiesJob_phase1 { break; case datasource: - Datasource d = (Datasource) entity; + final Datasource d = (Datasource) entity; re.setOfficialname(getValue(d.getOfficialname())); re.setWebsiteurl(getValue(d.getWebsiteurl())); re.setDatasourcetype(d.getDatasourcetype()); + re.setDatasourcetypeui(d.getDatasourcetypeui()); re.setOpenairecompatibility(d.getOpenairecompatibility()); break; case organization: - Organization o = (Organization) entity; + final Organization o = (Organization) entity; re.setLegalname(getValue(o.getLegalname())); re.setLegalshortname(getValue(o.getLegalshortname())); @@ -187,14 +193,14 @@ public class CreateRelatedEntitiesJob_phase1 { re.setWebsiteurl(getValue(o.getWebsiteurl())); break; case project: - Project p = (Project) entity; + final Project p = (Project) entity; re.setProjectTitle(getValue(p.getTitle())); re.setCode(getValue(p.getCode())); re.setAcronym(getValue(p.getAcronym())); re.setContracttype(p.getContracttype()); - List> f = p.getFundingtree(); + final List> f = p.getFundingtree(); if (!f.isEmpty()) { re.setFundingtree(f.stream().map(s -> s.getValue()).collect(Collectors.toList())); } @@ -203,11 +209,11 @@ public class CreateRelatedEntitiesJob_phase1 { return re; } - private static String getValue(Field field) { + private static String getValue(final Field field) { return getFieldValueWithDefault(field, ""); } - private static T getFieldValueWithDefault(Field f, T defaultValue) { + private static T getFieldValueWithDefault(final Field f, final T defaultValue) { return Optional .ofNullable(f) .filter(Objects::nonNull) @@ -216,21 +222,21 @@ public class CreateRelatedEntitiesJob_phase1 { } /** - * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text - * file, + * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text file, * * @param spark * @param relationPath * @return the Dataset containing all the relationships */ private static Dataset readPathRelation( - SparkSession spark, final String relationPath) { + final SparkSession spark, + final String relationPath) { log.info("Reading relations from: {}", relationPath); return spark.read().load(relationPath).as(Encoders.bean(Relation.class)); } - private static void removeOutputDir(SparkSession spark, String path) { + private static void removeOutputDir(final SparkSession spark, final String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 392d3cde6..19300d77d 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -735,6 +735,30 @@ public class XmlRecordFactory implements Serializable { .collect(Collectors.toList())); } + if (ds.getJurisdiction() != null) { + metadata.add(XmlSerializationUtils.mapQualifier("jurisdiction", ds.getJurisdiction())); + } + + if (ds.getThematic() != null) { + metadata.add(XmlSerializationUtils.asXmlElement("thematic", ds.getThematic().toString())); + } + + if (ds.getKnowledgegraph() != null) { + metadata + .add(XmlSerializationUtils.asXmlElement("knowledgegraph", ds.getKnowledgegraph().toString())); + } + + if (ds.getContentpolicies() != null) { + metadata + .addAll( + ds + .getContentpolicies() + .stream() + .filter(Objects::nonNull) + .map(q -> XmlSerializationUtils.mapQualifier("contentpolicy", q)) + .collect(Collectors.toList())); + } + break; case organization: final Organization o = (Organization) entity; From baad01cadcc499909d68651ff03a60213b7b8c76 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 29 Jul 2021 13:04:39 +0200 Subject: [PATCH 013/166] hostedbymap --- .../dhp/oa/dedup/GroupEntitiesSparkJob.java | 3 +- .../doiboost/DoiBoostMappingUtil.scala | 2 + .../dhp/oa/graph/hostebymap/Aggregators.scala | 54 ------ .../dhp/oa/graph/hostebymap/Constants.java | 15 -- .../dhp/oa/graph/hostebymap/GetCSV.java | 111 ------------ .../oa/graph/hostebymap/model/DOAJModel.java | 53 ------ .../hostebymap/model/UnibiGoldModel.java | 44 ----- .../oa/graph/hostedbymap/Aggregators.scala | 97 ++++++++++ .../dhp/oa/graph/hostedbymap/Constants.java | 13 ++ .../dhp/oa/graph/hostedbymap/GetCSV.java | 107 +++++++++++ .../SparkProduceHostedByMap.scala} | 107 ++++++----- .../oa/graph/hostedbymap/model/DOAJModel.java | 52 ++++++ .../hostedbymap/model/UnibiGoldModel.java | 45 +++++ .../hostedbymap/download_csv_parameters.json | 37 ++++ .../oa/graph/hostedbymap/hostedby_params.json | 38 ++++ .../hostedbymap/oozie_app/config-default.xml | 30 ++++ .../graph/hostedbymap/oozie_app/workflow.xml | 148 +++++++++++++++ .../oa/graph/hostedbymap/TestPreprocess.scala | 127 +++++++++---- .../dhp/oa/graph/hostedbymap/TestReadCSV.java | 168 +++++++++--------- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 28 +-- .../dhp/oa/graph/hostedbymap/datasource.json | 2 +- .../graph/hostedbymap/datasourceHostedByItem | 9 + .../dhp/oa/graph/hostedbymap/doajHostedByItem | 25 +++ .../oa/graph/hostedbymap/unibyHostedByItem | 29 +++ 24 files changed, 885 insertions(+), 459 deletions(-) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java rename dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/{hostebymap/SparkPrepareHostedByMapData.scala => hostedbymap/SparkProduceHostedByMap.scala} (76%) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java index 3f27b9442..58009bfcf 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java @@ -38,8 +38,7 @@ import scala.Tuple2; /** * Groups the graph content by entity identifier to ensure ID uniqueness */ -public class -GroupEntitiesSparkJob { +public class GroupEntitiesSparkJob { private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 686a2f1f1..502cb370f 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -245,6 +245,8 @@ object DoiBoostMappingUtil { if (item != null) { hb.setValue(item.officialname) hb.setKey(generateDSId(item.id)) + //TODO replace with the one above as soon as the new HBM will be used + //hb.setKey(item.id) if (item.openAccess) { i.setAccessright(getOpenAccessQualifier()) i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala deleted file mode 100644 index 561d2bbf4..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Aggregators.scala +++ /dev/null @@ -1,54 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap - -import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} -import org.apache.spark.sql.expressions.Aggregator - - -case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} -case class HostedByInfo(id: String, officialname: String, journal_id: String, provenance : String, id_type: String) {} - -object Aggregators { - - - - def getId(s1:String, s2:String) : String = { - if (!s1.equals("")){ - return s1} - s2 - } - - - def createHostedByItemTypes(df: Dataset[HostedByItemType]): Dataset[HostedByItemType] = { - val transformedData : Dataset[HostedByItemType] = df - .groupByKey(_.id)(Encoders.STRING) - .agg(Aggregators.hostedByAggregator) - .map{ - case (id:String , res:HostedByItemType) => res - }(Encoders.product[HostedByItemType]) - - transformedData - } - - val hostedByAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { - override def zero: HostedByItemType = HostedByItemType("","","","","",false) - override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { - return merge(b, a) - } - override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { - if (b1 == null){ - return b2 - } - if(b2 == null){ - return b1 - } - - HostedByItemType(getId(b1.id, b2.id), getId(b1.officialname, b2.officialname), getId(b1.issn, b2.issn), getId(b1.eissn, b2.eissn), getId(b1.lissn, b2.lissn), b1.openAccess || b2.openAccess) - - } - override def finish(reduction: HostedByItemType): HostedByItemType = reduction - override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - - override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - }.toColumn - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java deleted file mode 100644 index b07e33cd1..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/Constants.java +++ /dev/null @@ -1,15 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap; - -public class Constants { - - - - public static final String OPENAIRE = "openaire"; - public static final String DOAJ = "doaj"; - public static final String UNIBI = "unibi"; - - - public static final String ISSN = "issn"; - public static final String EISSN = "eissn"; - public static final String ISSNL = "issnl"; -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java deleted file mode 100644 index d397886a3..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/GetCSV.java +++ /dev/null @@ -1,111 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.opencsv.bean.CsvToBeanBuilder; -import eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel; -import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import org.apache.hadoop.conf.Configuration; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - -import java.io.*; -import java.net.URL; -import java.net.URLConnection; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.util.List; -import java.util.Optional; - -public class GetCSV { - private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.oa.graph.hostebymap.GetCSV.class); - - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - GetCSV.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json"))); - - parser.parseArgument(args); - - final String fileURL = parser.get("fileURL"); - final String hdfsPath = parser.get("hdfsPath"); - final String hdfsNameNode = parser.get("hdfsNameNode"); - final String classForName = parser.get("classForName"); - final Boolean shouldReplace = Optional.ofNullable((parser.get("replace"))) - .map(Boolean::valueOf) - .orElse(false); - - - URLConnection connection = new URL(fileURL).openConnection(); - connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); - connection.connect(); - - BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); - - if(shouldReplace){ - PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ.csv"))); - String line = null; - while((line = in.readLine())!= null){ - writer.println(line.replace("\\\"", "\"")); - } - writer.close(); - in.close(); - in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); - } - - Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsNameNode); - - FileSystem fileSystem = FileSystem.get(conf); - Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; - if (fileSystem.exists(hdfsWritePath)) { - fileSystem.delete(hdfsWritePath, false); - } - fsDataOutputStream = fileSystem.create(hdfsWritePath); - - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); - - Class clazz = Class.forName(classForName); - - ObjectMapper mapper = new ObjectMapper(); - - new CsvToBeanBuilder(in) - .withType(clazz) - .withMultilineLimit(1) - .build() - .parse() - .forEach(line -> { - try { - writer.write(mapper.writeValueAsString(line)); - writer.newLine(); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - - - - writer.close(); - in.close(); - if(shouldReplace){ - File f = new File("/tmp/DOAJ.csv"); - f.delete(); - } - - - } - - - - -} - diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java deleted file mode 100644 index fe1d14a76..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/DOAJModel.java +++ /dev/null @@ -1,53 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap.model; - -import java.io.Serializable; - -import com.opencsv.bean.CsvBindByName; - - -public class DOAJModel implements Serializable { - @CsvBindByName(column = "Journal title") - private String journalTitle; - - @CsvBindByName(column = "Journal ISSN (print version)") - private String issn ; - - @CsvBindByName(column = "Journal EISSN (online version)") - private String eissn; - - @CsvBindByName(column = "Review process") - private String reviewProcess; - - - public String getJournalTitle() { - return journalTitle; - } - - public void setJournalTitle(String journalTitle) { - this.journalTitle = journalTitle; - } - - public String getIssn() { - return issn; - } - - public void setIssn(String issn) { - this.issn = issn; - } - - public String getEissn() { - return eissn; - } - - public void setEissn(String eissn) { - this.eissn = eissn; - } - - public String getReviewProcess() { - return reviewProcess; - } - - public void setReviewProcess(String reviewProcess) { - this.reviewProcess = reviewProcess; - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java deleted file mode 100644 index 309f74eea..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/model/UnibiGoldModel.java +++ /dev/null @@ -1,44 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap.model; - -import com.opencsv.bean.CsvBindByName; - -import java.io.Serializable; - -public class UnibiGoldModel implements Serializable { - @CsvBindByName(column = "ISSN") - private String issn; - @CsvBindByName(column = "ISSN_L") - private String issn_l; - @CsvBindByName(column = "TITLE") - private String title; - @CsvBindByName(column = "TITLE_SOURCE") - private String title_source; - - public String getIssn() { - return issn; - } - - public void setIssn(String issn) { - this.issn = issn; - } - - public String getIssn_l() { - return issn_l; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getTitle_source() { - return title_source; - } - - public void setTitle_source(String title_source) { - this.title_source = title_source; - } -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala new file mode 100644 index 000000000..6a9346ed5 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -0,0 +1,97 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} +import org.apache.spark.sql.expressions.Aggregator + + +case class HostedByItemType(id: String, officialname: String, issn: String, eissn: String, lissn: String, openAccess: Boolean) {} +case class HostedByInfo(id: String, officialname: String, journal_id: String, provenance : String, id_type: String) {} + +object Aggregators { + + + + def getId(s1:String, s2:String) : String = { + if (s1.startsWith("10|")){ + return s1} + s2 + } + + def getValue(s1:String, s2:String) : String = { + if(!s1.equals("")){ + return s1 + } + s2 + } + + + def createHostedByItemTypes(df: Dataset[HostedByItemType]): Dataset[HostedByItemType] = { + val transformedData : Dataset[HostedByItemType] = df + .groupByKey(_.id)(Encoders.STRING) + .agg(Aggregators.hostedByAggregator) + .map{ + case (id:String , res:HostedByItemType) => res + }(Encoders.product[HostedByItemType]) + + transformedData + } + + val hostedByAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { + override def zero: HostedByItemType = HostedByItemType("","","","","",false) + override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { + return merge(b, a) + } + override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + + HostedByItemType(getId(b1.id, b2.id), getId(b1.officialname, b2.officialname), getId(b1.issn, b2.issn), getId(b1.eissn, b2.eissn), getId(b1.lissn, b2.lissn), b1.openAccess || b2.openAccess) + + } + override def finish(reduction: HostedByItemType): HostedByItemType = reduction + override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + + override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + }.toColumn + + def explodeHostedByItemType(df: Dataset[(String, HostedByItemType)]): Dataset[(String, HostedByItemType)] = { + val transformedData : Dataset[(String, HostedByItemType)] = df + .groupByKey(_._1)(Encoders.STRING) + .agg(Aggregators.hostedByAggregator1) + .map{ + case (id:String , res:(String, HostedByItemType)) => res + }(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])) + + transformedData + } + + val hostedByAggregator1: TypedColumn[(String, HostedByItemType), (String, HostedByItemType)] = new Aggregator[(String, HostedByItemType), (String, HostedByItemType), (String, HostedByItemType)] { + override def zero: (String, HostedByItemType) = ("", HostedByItemType("","","","","",false)) + override def reduce(b: (String, HostedByItemType), a:(String,HostedByItemType)): (String, HostedByItemType) = { + return merge(b, a) + } + override def merge(b1: (String, HostedByItemType), b2: (String, HostedByItemType)): (String, HostedByItemType) = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + if(b1._2.id.startsWith("10|")){ + return (b1._1, HostedByItemType(b1._2.id, b1._2.officialname, b1._2.issn, b1._2.eissn, b1._2.lissn, b1._2.openAccess || b2._2.openAccess)) + + } + return (b2._1, HostedByItemType(b2._2.id, b2._2.officialname, b2._2.issn, b2._2.eissn, b2._2.lissn, b1._2.openAccess || b2._2.openAccess)) + + } + override def finish(reduction: (String,HostedByItemType)): (String, HostedByItemType) = reduction + override def bufferEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) + + override def outputEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) + }.toColumn + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java new file mode 100644 index 000000000..b29877a48 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Constants.java @@ -0,0 +1,13 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +public class Constants { + + public static final String OPENAIRE = "openaire"; + public static final String DOAJ = "doaj"; + public static final String UNIBI = "unibi"; + + public static final String ISSN = "issn"; + public static final String EISSN = "eissn"; + public static final String ISSNL = "issnl"; +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java new file mode 100644 index 000000000..9516cf6f7 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java @@ -0,0 +1,107 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import java.io.*; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.opencsv.bean.CsvToBeanBuilder; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class GetCSV { + private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV.class); + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + GetCSV.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json"))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + final String hdfsPath = parser.get("workingPath"); + final String hdfsNameNode = parser.get("hdfsNameNode"); + final String classForName = parser.get("classForName"); + final Boolean shouldReplace = Optional + .ofNullable((parser.get("replace"))) + .map(Boolean::valueOf) + .orElse(false); + + URLConnection connection = new URL(fileURL).openConnection(); + connection + .setRequestProperty( + "User-Agent", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); + connection.connect(); + + BufferedReader in = new BufferedReader( + new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); + + if (shouldReplace) { + PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ.csv"))); + String line = null; + while ((line = in.readLine()) != null) { + writer.println(line.replace("\\\"", "\"")); + } + writer.close(); + in.close(); + in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); + } + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + Path hdfsWritePath = new Path(hdfsPath); + FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { + fileSystem.delete(hdfsWritePath, false); + } + fsDataOutputStream = fileSystem.create(hdfsWritePath); + + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + + Class clazz = Class.forName(classForName); + + ObjectMapper mapper = new ObjectMapper(); + + new CsvToBeanBuilder(in) + .withType(clazz) + .withMultilineLimit(1) + .build() + .parse() + .forEach(line -> { + try { + writer.write(mapper.writeValueAsString(line)); + writer.newLine(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + writer.close(); + in.close(); + if (shouldReplace) { + File f = new File("/tmp/DOAJ.csv"); + f.delete(); + } + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala similarity index 76% rename from dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala rename to dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index b66e97281..c44f2cbed 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostebymap/SparkPrepareHostedByMapData.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -1,17 +1,23 @@ -package eu.dnetlib.dhp.oa.graph.hostebymap +package eu.dnetlib.dhp.oa.graph.hostedbymap import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.graph.hostebymap.model.{DOAJModel, UnibiGoldModel} -import eu.dnetlib.dhp.schema.oaf.{Datasource} +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DOAJModel, UnibiGoldModel} +import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.json4s.DefaultFormats import org.slf4j.{Logger, LoggerFactory} - import com.fasterxml.jackson.databind.ObjectMapper +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.fs.Path +import java.io.PrintWriter -object SparkPrepareHostedByMapData { +import org.apache.hadoop.io.compress.GzipCodec + + +object SparkProduceHostedByMap { implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) @@ -37,24 +43,32 @@ object SparkPrepareHostedByMapData { } } -// def toHostedByMap(input: HostedByItemType): ListBuffer[String] = { -// implicit val formats = DefaultFormats -// val serializedJSON:String = write(input) -// -// var hostedBy = new ListBuffer[String]() -// if(!input.issn.equals("")){ -// hostedBy += "{\"" + input.issn + "\":" + serializedJSON + "}" -// } -// if(!input.eissn.equals("")){ -// hostedBy += "{\"" + input.eissn + "\":" + serializedJSON + "}" -// } -// if(!input.lissn.equals("")){ -// hostedBy += "{\"" + input.lissn + "\":" + serializedJSON + "}" -// } -// -// hostedBy -// -// } + def toHostedByMap(input: (String, HostedByItemType)): String = { + import org.json4s.jackson.Serialization + + implicit val formats = org.json4s.DefaultFormats + + val map: Map [String, HostedByItemType] = Map (input._1 -> input._2 ) + + Serialization.write(map) + + + } + + /** + * + def toHostedByMap(input: Map[String, HostedByItemType]): String = { + import org.json4s.jackson.Serialization + + implicit val formats = org.json4s.DefaultFormats + + + + Serialization.write(input) + + + } + */ def getHostedByItemType(id:String, officialname: String, issn:String, eissn:String, issnl:String, oa:Boolean): HostedByItemType = { if(issn != null){ @@ -166,11 +180,31 @@ object SparkPrepareHostedByMapData { } + + def writeToHDFS(input: Array[String], outputPath: String, hdfsNameNode : String):Unit = { + val conf = new Configuration() + + conf.set("fs.defaultFS", hdfsNameNode) + val fs= FileSystem.get(conf) + val output = fs.create(new Path(outputPath)) + val writer = new PrintWriter(output) + try { + input.foreach(hbi => writer.println(hbi)) + } + finally { + writer.close() + + } + + } + + + def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() - val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedby/prepare_hostedby_params.json"))) + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json"))) parser.parseArgument(args) val spark: SparkSession = SparkSession @@ -179,11 +213,10 @@ object SparkPrepareHostedByMapData { .appName(getClass.getSimpleName) .master(parser.get("master")).getOrCreate() - import spark.implicits._ val datasourcePath = parser.get("datasourcePath") val workingDirPath = parser.get("workingPath") - + val outputPath = parser.get("outputPath") implicit val formats = DefaultFormats @@ -191,29 +224,15 @@ object SparkPrepareHostedByMapData { logger.info("Getting the Datasources") - // val doajDataset: Dataset[DOAJModel] = spark.read.textFile(workingDirPath + "/doaj").as[DOAJModel] - val dats : Dataset[HostedByItemType] = - oaHostedByDataset(spark, datasourcePath) + Aggregators.explodeHostedByItemType(oaHostedByDataset(spark, datasourcePath) .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold")) .union(doajHostedByDataset(spark, workingDirPath + "/doaj")) - dats.flatMap(hbi => toList(hbi)) - .groupByKey(_._1) + .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) + .map(hbi => toHostedByMap(hbi))(Encoders.STRING) + .rdd.saveAsTextFile(outputPath + "/hostedByMap", classOf[GzipCodec]) -// -// - -// - -// -// Aggregators.createHostedByItemTypes(oa.joinWith(doaj, oa.col("journal_id").equalTo(doaj.col("journal_id")), "left") -// .joinWith(gold, $"_1.col('journal_id')".equalTo(gold.col("journal_id")), "left").map(toHostedByItemType) -// .filter(i => i != null)) -// .flatMap(toHostedByMap) -// .write.mode(SaveMode.Overwrite).save(s"$workingDirPath/HostedByMap") -// -// } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java new file mode 100644 index 000000000..ba804b939 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +public class DOAJModel implements Serializable { + @CsvBindByName(column = "Journal title") + private String journalTitle; + + @CsvBindByName(column = "Journal ISSN (print version)") + private String issn; + + @CsvBindByName(column = "Journal EISSN (online version)") + private String eissn; + + @CsvBindByName(column = "Review process") + private String reviewProcess; + + public String getJournalTitle() { + return journalTitle; + } + + public void setJournalTitle(String journalTitle) { + this.journalTitle = journalTitle; + } + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getEissn() { + return eissn; + } + + public void setEissn(String eissn) { + this.eissn = eissn; + } + + public String getReviewProcess() { + return reviewProcess; + } + + public void setReviewProcess(String reviewProcess) { + this.reviewProcess = reviewProcess; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java new file mode 100644 index 000000000..0927a136b --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java @@ -0,0 +1,45 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +public class UnibiGoldModel implements Serializable { + @CsvBindByName(column = "ISSN") + private String issn; + @CsvBindByName(column = "ISSN_L") + private String issn_l; + @CsvBindByName(column = "TITLE") + private String title; + @CsvBindByName(column = "TITLE_SOURCE") + private String title_source; + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getIssn_l() { + return issn_l; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getTitle_source() { + return title_source; + } + + public void setTitle_source(String title_source) { + this.title_source = title_source; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json new file mode 100644 index 000000000..fba048343 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json @@ -0,0 +1,37 @@ + +[ + + { + "paramName":"fu", + "paramLongName":"fileURL", + "paramDescription": "the url to download the csv file ", + "paramRequired": true + }, + + { + "paramName":"wp", + "paramLongName":"workingPath", + "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramRequired": true + }, + { + "paramName": "hnn", + "paramLongName": "hdfsNameNode", + "paramDescription": "the path used to store the HostedByMap", + "paramRequired": true + }, + { + "paramName": "cfn", + "paramLongName": "classForName", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + }, + { + "paramName": "sr", + "paramLongName": "replace", + "paramDescription": "true if the input file has to be cleaned before parsing", + "paramRequired": false + } +] + + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json new file mode 100644 index 000000000..9173b78ae --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_params.json @@ -0,0 +1,38 @@ + +[ + + { + "paramName":"dsp", + "paramLongName":"datasourcePath", + "paramDescription": "the path to the datasource ", + "paramRequired": true + }, + + { + "paramName":"wp", + "paramLongName":"workingPath", + "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "the path used to store the HostedByMap", + "paramRequired": true + }, + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "m", + "paramLongName": "master", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + } +] + + + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml new file mode 100644 index 000000000..e5ec3d0ae --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml new file mode 100644 index 000000000..ecf6c3b31 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -0,0 +1,148 @@ + + + + + sourcePath + the source path + + + outputPath + the output path + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + + + + + + + eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV + --hdfsNameNode${nameNode} + --fileURL${unibiFileURL} + --workingPath${workingDir}/unibi_gold + --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel + + + + + + + + eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV + --hdfsNameNode${nameNode} + --fileURL${doajFileURL} + --workingPath${workingDir}/doaj + --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel + --replacetrue + + + + + + + + + + + + yarn-cluster + Produce the new HostedByMap + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkProduceHostedByMap + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --datasourcePath${sourcePath}/datasource + --workingPath${workingDir} + --outputPath${outputPath} + --masteryarn-cluster + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala index 8e5657bfc..2ed76a72a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -1,19 +1,14 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap -import java.sql.Timestamp - -import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.oa.graph.hostebymap.{Constants, HostedByInfo, SparkPrepareHostedByMapData} +import eu.dnetlib.dhp.oa.graph.hostedbymap.{Aggregators, Constants, HostedByInfo, HostedByItemType, SparkProduceHostedByMap} import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} import org.json4s.DefaultFormats import org.junit.jupiter.api.Assertions.{assertNotNull, assertTrue} import org.junit.jupiter.api.Test -import org.slf4j.{Logger, LoggerFactory} - -import scala.collection.mutable.ListBuffer -import scala.io.Source +import org.junit.jupiter.api.Assertions._ +import org.json4s.jackson.Serialization.write class TestPreprocess extends java.io.Serializable{ @@ -21,19 +16,14 @@ class TestPreprocess extends java.io.Serializable{ implicit val schema = Encoders.product[HostedByInfo] + def toHBIString (hbi:HostedByItemType): String = { + implicit val formats = DefaultFormats + + write(hbi) + } @Test def readDatasource():Unit = { - - - import org.apache.spark.sql.Encoders - implicit val formats = DefaultFormats - - val logger: Logger = LoggerFactory.getLogger(getClass) - val mapper = new ObjectMapper() - - - val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -45,25 +35,29 @@ class TestPreprocess extends java.io.Serializable{ .getOrCreate() val path = getClass.getResource("datasource.json").getPath + val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.oaHostedByDataset(spark, path) - println(SparkPrepareHostedByMapData.oaHostedByDataset(spark, path).count) + assertEquals(9, ds.count) + assertEquals(8, ds.filter(hbi => !hbi.issn.equals("")).count) + assertEquals(5, ds.filter(hbi => !hbi.eissn.equals("")).count) + assertEquals(0, ds.filter(hbi => !hbi.lissn.equals("")).count) + assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365")).count == 1) + assertTrue(ds.filter(hbi => hbi.eissn.equals("2253-900X")).count == 1) + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.eissn.equals("2253-900X")).count == 1) + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.officialname.equals("Thémata")).count == 1) + assertTrue(ds.filter(hbi => hbi.issn.equals("0212-8365") && hbi.id.equals("10|doajarticles::abbc9265bea9ff62776a1c39785af00c")).count == 1) + ds.foreach(hbi => assertTrue(hbi.id.startsWith("10|"))) + ds.foreach(hbi => println(toHBIString(hbi))) spark.close() } @Test def readGold():Unit = { - - implicit val formats = DefaultFormats - - val logger: Logger = LoggerFactory.getLogger(getClass) - val mapper = new ObjectMapper() - - - val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -76,23 +70,27 @@ class TestPreprocess extends java.io.Serializable{ val path = getClass.getResource("unibi_transformed.json").getPath - println(SparkPrepareHostedByMapData.goldHostedByDataset(spark, path).count) + val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.goldHostedByDataset(spark, path) + assertEquals(29, ds.count) + assertEquals(29, ds.filter(hbi => !hbi.issn.equals("")).count) + assertEquals(0, ds.filter(hbi => !hbi.eissn.equals("")).count) + assertEquals(29, ds.filter(hbi => !hbi.lissn.equals("")).count) + + assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + + assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).first().officialname.equals("European journal of sustainable development.")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).first().lissn.equals("2239-5938")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2239-6101")).count == 1) + ds.foreach(hbi => assertTrue(hbi.id.equals(Constants.UNIBI))) + ds.foreach(hbi => println(toHBIString(hbi))) spark.close() } @Test def readDoaj():Unit = { - - implicit val formats = DefaultFormats - - val logger: Logger = LoggerFactory.getLogger(getClass) - val mapper = new ObjectMapper() - - - val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -104,14 +102,69 @@ class TestPreprocess extends java.io.Serializable{ .getOrCreate() val path = getClass.getResource("doaj_transformed.json").getPath + val ds :Dataset[HostedByItemType]= SparkProduceHostedByMap.doajHostedByDataset(spark, path) - println(SparkPrepareHostedByMapData.doajHostedByDataset(spark, path).count) + assertEquals(25, ds.count) + assertEquals(14, ds.filter(hbi => !hbi.issn.equals("")).count) + assertEquals(21, ds.filter(hbi => !hbi.eissn.equals("")).count) + assertEquals(0, ds.filter(hbi => !hbi.lissn.equals("")).count) + assertEquals(0, ds.filter(hbi => hbi.issn.equals("") && hbi.eissn.equals("") && hbi.lissn.equals("")).count) + + assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).first().officialname.equals("Journal of Space Technology")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).first().eissn.equals("2411-5029")) + assertTrue(ds.filter(hbi => hbi.issn.equals("2077-3099")).count == 1) + assertTrue(ds.filter(hbi => hbi.eissn.equals("2077-2955")).first().issn.equals("")) + ds.foreach(hbi => assertTrue(hbi.id.equals(Constants.DOAJ))) + ds.foreach(hbi => println(toHBIString(hbi))) spark.close() } + @Test + def testAggregator() : Unit = { + + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + + + val tmp = SparkProduceHostedByMap.oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath) + .union(SparkProduceHostedByMap.goldHostedByDataset(spark,getClass.getResource("unibi_transformed.json").getPath)) + .union(SparkProduceHostedByMap.doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath)) + .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])) + + assertEquals(106, tmp.count) + assertEquals(82, tmp.map(i => i._1)(Encoders.STRING).distinct().count) + + + val ds :Dataset[(String, HostedByItemType)] = Aggregators.explodeHostedByItemType(SparkProduceHostedByMap.oaHostedByDataset(spark, getClass.getResource("datasource.json").getPath) + .union(SparkProduceHostedByMap.goldHostedByDataset(spark,getClass.getResource("unibi_transformed.json").getPath)) + .union(SparkProduceHostedByMap.doajHostedByDataset(spark, getClass.getResource("doaj_transformed.json").getPath)) + .flatMap(hbi => SparkProduceHostedByMap.toList(hbi))(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]))) + + assertEquals(82, ds.count) + + assertEquals(13, ds.filter(i => i._2.id.startsWith("10|")).count) + + assertTrue(ds.filter(i => i._1.equals("2077-3757")).first()._2.id.startsWith("10|")) + assertTrue(ds.filter(i => i._1.equals("2077-3757")).first()._2.openAccess) + assertEquals(1, ds.filter(i => i._1.equals("2077-3757")).count) + + val hbmap : Dataset[String] = ds.filter(hbi => hbi._2.id.startsWith("10|")).map(SparkProduceHostedByMap.toHostedByMap)(Encoders.STRING) + + hbmap.foreach(entry => println(entry)) + spark.close() + + } + diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java index 01c70502c..f886b275b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java @@ -1,111 +1,109 @@ + package eu.dnetlib.dhp.oa.graph.hostedbymap; +import java.io.*; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.util.List; + +import org.junit.jupiter.api.Test; + import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.opencsv.bean.CsvToBeanBuilder; -import eu.dnetlib.dhp.oa.graph.hostebymap.GetCSV; -import eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel; -import org.junit.jupiter.api.Test; -import java.io.*; -import java.net.MalformedURLException; -import java.net.URL; -import java.net.URLConnection; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.util.List; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel; -public class TestReadCSV { +public class TestReadCSV { - @Test - public void testCSVUnibi() throws FileNotFoundException { + @Test + public void testCSVUnibi() throws FileNotFoundException { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv") + .getPath(); - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv") - .getPath(); + List beans = new CsvToBeanBuilder(new FileReader(sourcePath)) + .withType(UnibiGoldModel.class) + .build() + .parse(); - List beans = new CsvToBeanBuilder(new FileReader(sourcePath)) - .withType(UnibiGoldModel.class) - .build() - .parse(); + ObjectMapper mapper = new ObjectMapper(); - ObjectMapper mapper = new ObjectMapper(); + beans.forEach(r -> { + try { + System.out.println(mapper.writeValueAsString(r)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + }); - beans.forEach(r -> { - try { - System.out.println(mapper.writeValueAsString(r)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - }); + } + @Test + public void testCSVUrlUnibi() throws IOException { - } + URL csv = new URL("https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"); - @Test - public void testCSVUrlUnibi() throws IOException { + BufferedReader in = new BufferedReader(new InputStreamReader(csv.openStream())); + ObjectMapper mapper = new ObjectMapper(); - URL csv = new URL("https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"); + new CsvToBeanBuilder(in) + .withType(eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel.class) + .build() + .parse() + .forEach(line -> - BufferedReader in = new BufferedReader(new InputStreamReader(csv.openStream())); - ObjectMapper mapper = new ObjectMapper(); + { + try { + System.out.println(mapper.writeValueAsString(line)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + } - new CsvToBeanBuilder(in) - .withType(eu.dnetlib.dhp.oa.graph.hostebymap.model.UnibiGoldModel.class) - .build() - .parse() - .forEach(line -> + ); + } - { - try { - System.out.println(mapper.writeValueAsString(line)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - } + @Test + public void testCSVUrlDOAJ() throws IOException { + URLConnection connection = new URL("https://doaj.org/csv").openConnection(); + connection + .setRequestProperty( + "User-Agent", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); + connection.connect(); - ); - } + BufferedReader in = new BufferedReader( + new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); + // BufferedReader in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); + PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv"))); + String line = null; + while ((line = in.readLine()) != null) { + writer.println(line.replace("\\\"", "\"")); + } + writer.close(); + in.close(); + in = new BufferedReader(new FileReader("/tmp/DOAJ_1.csv")); + ObjectMapper mapper = new ObjectMapper(); - @Test - public void testCSVUrlDOAJ() throws IOException { + new CsvToBeanBuilder(in) + .withType(eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel.class) + .withMultilineLimit(1) + .build() + .parse() + .forEach(lline -> - URLConnection connection = new URL("https://doaj.org/csv").openConnection(); - connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); - connection.connect(); + { + try { + System.out.println(mapper.writeValueAsString(lline)); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } + } - BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); - //BufferedReader in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); - PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv"))); - String line = null; - while((line = in.readLine())!= null){ - writer.println(line.replace("\\\"", "\"")); - } - writer.close(); - in.close(); - in = new BufferedReader(new FileReader("/tmp/DOAJ_1.csv")); - ObjectMapper mapper = new ObjectMapper(); - - - - new CsvToBeanBuilder(in) - .withType(eu.dnetlib.dhp.oa.graph.hostebymap.model.DOAJModel.class) - .withMultilineLimit(1) - .build() - .parse() - .forEach(lline -> - - { - try { - System.out.println(mapper.writeValueAsString(lline)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - } - - - ); - } + ); + } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index c41a6c68c..63f18a803 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1,13 +1,13 @@ package eu.dnetlib.dhp.oa.graph.raw; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.lenient; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.BeforeEach; @@ -16,12 +16,14 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import java.io.IOException; -import java.util.List; -import java.util.Optional; +import com.fasterxml.jackson.databind.ObjectMapper; -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.Mockito.lenient; +import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; +import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class MappersTest { @@ -340,7 +342,7 @@ public class MappersTest { assertEquals(2, p.getOriginalId().size()); assertTrue(p.getOriginalId().stream().anyMatch(oid -> oid.equals("oai:pub.uni-bielefeld.de:2949739"))); - //assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); + // assertEquals("oai:pub.uni-bielefeld.de:2949739", p.getOriginalId().get(0)); assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(p.getAuthor().size() > 0); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json index 818aaa716..4467c702f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json @@ -1,6 +1,6 @@ {"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":null,"dataprovider":{"dataInfo":null,"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":null,"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":null,"value":"0.0"},"longitude":{"dataInfo":null,"value":"0.0"},"namespaceprefix":{"dataInfo":null,"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":null,"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":null,"value":"0.0"},"officialname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":null,"value":false},"subjects":[{"dataInfo":null,"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":null,"value":false},"websiteurl":{"dataInfo":null,"value":"https://www.energyret.ru/jour/"}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2640-4613","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2077-3757","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"extraInfo":[],"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2605-8359","issnPrinted":"0751-4239","name":"Cahiers d’études germaniques"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl07514239"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0751-4239"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"extraInfo":[],"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2311-8733","issnPrinted":"2073-1477","name":"Regional Economics Theory and Practice"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl20731477"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2073-1477"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"extraInfo":[],"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"0041-1337","name":"Transplantation"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl00411337"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Transplantation"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0041-1337"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem new file mode 100644 index 000000000..093c57a9c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceHostedByItem @@ -0,0 +1,9 @@ +{"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","officialname":"Известия высших учебных заведений: Проблемы энергетики","issn":"1998-9903","eissn":"","lissn":"","openAccess":false} +{"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","officialname":"Thémata","issn":"0212-8365","eissn":"2253-900X","lissn":"","openAccess":false} +{"id":"10|issn___print::051e86306840dc8255d95c5671e97928","officialname":"Science Technology & Public Policy","issn":"2640-4613","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","officialname":"Cahiers d’études germaniques","issn":"0751-4239","eissn":"2605-8359","lissn":"","openAccess":false} +{"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","officialname":"Regional Economics Theory and Practice","issn":"2073-1477","eissn":"2311-8733","lissn":"","openAccess":false} +{"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","officialname":"Transplantation","issn":"0041-1337","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","officialname":"International Journal of Operations Research and Information Systems","issn":"1947-9328","eissn":"1947-9336","lissn":"","openAccess":false} +{"id":"10|issn___print::b9faf9c36c47169d4328e586eb62247c","officialname":"Bulletin of the British Mycological Society","issn":"0007-1528","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn__online::709e633c2ecf46396a4ed1b0096da1d0","officialname":"Journal of Technology and Innovation","issn":"","eissn":"2410-3993","lissn":"","openAccess":false} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem new file mode 100644 index 000000000..effd0dd60 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/doajHostedByItem @@ -0,0 +1,25 @@ +{"id":"doaj","officialname":"Lëd i Sneg","issn":"2076-6734","eissn":"2412-3765","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Компьютерные исследования и моделирование","issn":"2076-7633","eissn":"2077-6853","lissn":"","openAccess":true} +{"id":"doaj","officialname":" Историко-биологические исследования","issn":"2076-8176","eissn":"2500-1221","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Інформаційні технології і засоби навчання","issn":"2076-8184","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Revue Internationale de Pédagogie de l’Enseignement Supérieur","issn":"","eissn":"2076-8427","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Проблемы развития территории","issn":"2076-8915","eissn":"2409-9007","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Rambam Maimonides Medical Journal","issn":"","eissn":"2076-9172","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Membranes","issn":"2077-0375","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Clinical Medicine","issn":"","eissn":"2077-0383","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Agriculture","issn":"","eissn":"2077-0472","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Standartnye Obrazcy","issn":"2077-1177","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Металл и литье Украины","issn":"2077-1304","eissn":"2706-5529","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Marine Science and Engineering","issn":"","eissn":"2077-1312","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Religions","issn":"","eissn":"2077-1444","lissn":"","openAccess":true} +{"id":"doaj","officialname":"GW-Unterricht","issn":"2077-1517","eissn":"2414-4169","lissn":"","openAccess":true} +{"id":"doaj","officialname":"UCV-Scientia","issn":"2077-172X","eissn":"","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Sovremennye Issledovaniâ Socialʹnyh Problem","issn":"2077-1770","eissn":"2218-7405","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Granì","issn":"2077-1800","eissn":"2413-8738","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Economics Finance and Administrative Science","issn":"2077-1886","eissn":"2218-0648","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Science Education International","issn":"","eissn":"2077-2327","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Edumecentro","issn":"","eissn":"2077-2874","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Monteverdia","issn":"","eissn":"2077-2890","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Transformación","issn":"","eissn":"2077-2955","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Journal of Space Technology","issn":"2077-3099","eissn":"2411-5029","lissn":"","openAccess":true} +{"id":"doaj","officialname":"Revue de Primatologie","issn":"","eissn":"2077-3757","lissn":"","openAccess":true} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem new file mode 100644 index 000000000..403ffdf5d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibyHostedByItem @@ -0,0 +1,29 @@ +{"id":"unibi","officialname":"JIMKESMAS (Jurnal Ilmiah Mahasiswa Kesehatan Masyarakat)","issn":"2502-731X","eissn":"","lissn":"2502-731X","openAccess":true} +{"id":"unibi","officialname":"Jurnal ilmu informasi, perpustakaan, dan kearsipan","issn":"2502-7409","eissn":"","lissn":"1411-0253","openAccess":true} +{"id":"unibi","officialname":"At-Tadbir : jurnal ilmiah manajemen","issn":"2502-7433","eissn":"","lissn":"2502-7433","openAccess":true} +{"id":"unibi","officialname":"Jurnal Kesehatan Panrita Husada.","issn":"2502-745X","eissn":"","lissn":"2502-745X","openAccess":true} +{"id":"unibi","officialname":"ELang journal (An English Education journal)","issn":"2502-7549","eissn":"","lissn":"2502-7549","openAccess":true} +{"id":"unibi","officialname":"̒Ulūm-i darmāngāhī-i dāmpizishkī-i Īrān.","issn":"2423-3633","eissn":"","lissn":"2423-3625","openAccess":true} +{"id":"unibi","officialname":"Pizhūhishnāmah-i ̒ilm/sanjī.","issn":"2423-5563","eissn":"","lissn":"2423-3773","openAccess":true} +{"id":"unibi","officialname":"Iranian journal of animal biosystematics.","issn":"1735-434X","eissn":"","lissn":"1735-434X","openAccess":true} +{"id":"unibi","officialname":"Majallah-i jangal-i Īrān.","issn":"2423-4435","eissn":"","lissn":"2008-6113","openAccess":true} +{"id":"unibi","officialname":"Ābziyān-i zinatī.","issn":"2423-4575","eissn":"","lissn":"2423-4575","openAccess":true} +{"id":"unibi","officialname":"Pizhūhishnāmah-i ravābiṭ-i biyn/al- milal.","issn":"2423-4974","eissn":"","lissn":"2423-4974","openAccess":true} +{"id":"unibi","officialname":"AIHM journal club.","issn":"2380-0607","eissn":"","lissn":"2380-0607","openAccess":true} +{"id":"unibi","officialname":"Frontiers.","issn":"1085-4568","eissn":"","lissn":"1085-4568","openAccess":true} +{"id":"unibi","officialname":"˜The œjournal of contemporary archival studies.","issn":"2380-8845","eissn":"","lissn":"2380-8845","openAccess":true} +{"id":"unibi","officialname":"International journal of complementary & alternative medicine.","issn":"2381-1803","eissn":"","lissn":"2381-1803","openAccess":true} +{"id":"unibi","officialname":"Palapala.","issn":"2381-2478","eissn":"","lissn":"2381-2478","openAccess":true} +{"id":"unibi","officialname":"Asia pacific journal of environment ecology and sustainable development.","issn":"2382-5170","eissn":"","lissn":"2382-5170","openAccess":true} +{"id":"unibi","officialname":"Majallah-i salāmat va bihdāsht","issn":"2382-9737","eissn":"","lissn":"2382-9737","openAccess":true} +{"id":"unibi","officialname":"UCT journal of research in science ,engineering and technology","issn":"2382-977X","eissn":"","lissn":"2382-977X","openAccess":true} +{"id":"unibi","officialname":"Bih/nizhādī-i giyāhān-i zirā̒ī va bāghī.","issn":"2382-9974","eissn":"","lissn":"2382-9974","openAccess":true} +{"id":"unibi","officialname":"Problemi endokrinnoï patologìï.","issn":"2227-4782","eissn":"","lissn":"2227-4782","openAccess":true} +{"id":"unibi","officialname":"Jurnal Kebijakan Pembangunan Daerah : Jurnal Penelitian dan Pengembangan Kebijakan Pembangunan Daerah.","issn":"2685-0079","eissn":"","lissn":"2597-4971","openAccess":true} +{"id":"unibi","officialname":"Hypermedia magazine.","issn":"2574-0075","eissn":"","lissn":"2574-0075","openAccess":true} +{"id":"unibi","officialname":"˜The œmuseum review.","issn":"2574-0296","eissn":"","lissn":"2574-0296","openAccess":true} +{"id":"unibi","officialname":"Bioactive compounds in health and disease.","issn":"2574-0334","eissn":"","lissn":"2574-0334","openAccess":true} +{"id":"unibi","officialname":"Journal of computer science integration.","issn":"2574-108X","eissn":"","lissn":"2574-108X","openAccess":true} +{"id":"unibi","officialname":"Child and adolescent obesity.","issn":"2574-254X","eissn":"","lissn":"2574-254X","openAccess":true} +{"id":"unibi","officialname":"Journal of research on the college president.","issn":"2574-3325","eissn":"","lissn":"2574-3325","openAccess":true} +{"id":"unibi","officialname":"European journal of sustainable development.","issn":"2239-6101","eissn":"","lissn":"2239-5938","openAccess":true} \ No newline at end of file From 613bd3bde0076113dd37a64d2efbc23ec8e9ec61 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 30 Jul 2021 17:54:45 +0200 Subject: [PATCH 014/166] Hosted By Map - refactor of the first attemp to prepare a new hosted by map dependent on the datasource in the graph and on two external sources: the gold list from unibi ad the doaj list of open access journal. Both the lists are downloaded from provided url parameter --- .../graph/hostedbymap/SparkProduceHostedByMap.scala | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index c44f2cbed..ca5e82a4a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -55,20 +55,7 @@ object SparkProduceHostedByMap { } - /** - * - def toHostedByMap(input: Map[String, HostedByItemType]): String = { - import org.json4s.jackson.Serialization - implicit val formats = org.json4s.DefaultFormats - - - - Serialization.write(input) - - - } - */ def getHostedByItemType(id:String, officialname: String, issn:String, eissn:String, issnl:String, oa:Boolean): HostedByItemType = { if(issn != null){ From d8b9b0553b7c89b7831625dd3804384c9dab4e03 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 30 Jul 2021 17:55:39 +0200 Subject: [PATCH 015/166] Hosted By Map - model classes to store the intermediate information to be used to apply the hosted by map --- .../hostedbymap/model/DatasourceInfo.java | 72 +++++++++++++++++++ .../graph/hostedbymap/model/EntityInfo.java | 69 ++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java new file mode 100644 index 000000000..f5ac8f70c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java @@ -0,0 +1,72 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap.model; + +import java.io.Serializable; + +public class DatasourceInfo implements Serializable { + private String id; + private String officialname; + private String issn; + private String eissn; + private String lissn; + private Boolean openAccess; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getOfficialname() { + return officialname; + } + + public void setOfficialname(String officialname) { + this.officialname = officialname; + } + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getEissn() { + return eissn; + } + + public void setEissn(String eissn) { + this.eissn = eissn; + } + + public String getLissn() { + return lissn; + } + + public void setLissn(String lissn) { + this.lissn = lissn; + } + + public Boolean getOpenAccess() { + return openAccess; + } + + public void setOpenAccess(Boolean openAccess) { + this.openAccess = openAccess; + } + + public static DatasourceInfo newInstance(String id, String officialname, String issn, String eissn, String lissn){ + DatasourceInfo di = new DatasourceInfo(); + di.id = id; + di.officialname = officialname; + di.issn = (issn != null) ? issn : "" ; + di.eissn = (eissn != null) ? eissn:""; + di.lissn = (lissn != null) ? lissn : ""; + di.openAccess = false; + + return di; + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java new file mode 100644 index 000000000..7bf9d0598 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java @@ -0,0 +1,69 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap.model; + +import java.io.Serializable; + +public class EntityInfo implements Serializable { + private String id; + private String journal_id; + private String name; + private Boolean openaccess; + private String hb_id; + + + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getJournal_id() { + return journal_id; + } + + public void setJournal_id(String journal_id) { + this.journal_id = journal_id; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public Boolean getOpenaccess() { + return openaccess; + } + + public void setOpenaccess(Boolean openaccess) { + this.openaccess = openaccess; + } + + public String getHb_id() { + return hb_id; + } + + public void setHb_id(String hb_id) { + this.hb_id = hb_id; + } + + public static EntityInfo newInstance(String id, String j_id, String name){ + return newInstance(id, j_id, name, false); + + } + + public static EntityInfo newInstance(String id, String j_id, String name, Boolean openaccess){ + EntityInfo pi = new EntityInfo(); + pi.id = id; + pi.journal_id = j_id; + pi.name = name; + pi.openaccess = openaccess; + pi.hb_id = ""; + return pi; + + } +} From 7c6ea2f4c700c3ce59dd78e5e4eed4ea8c198c5e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 30 Jul 2021 17:56:27 +0200 Subject: [PATCH 016/166] Hosted By Map - first attempt for the creation of intermedia information to be used to applu the hosted by map on the graph entities --- .../oa/graph/hostedbymap/Aggregators.scala | 113 +++++++++----- .../SparkPrepareHostedByInfoToApply.scala | 147 ++++++++++++++++++ 2 files changed, 225 insertions(+), 35 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala index 6a9346ed5..8077efe30 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -1,5 +1,6 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import org.apache.spark.sql.{Dataset, Encoder, Encoders, TypedColumn} import org.apache.spark.sql.expressions.Aggregator @@ -25,43 +26,10 @@ object Aggregators { } - def createHostedByItemTypes(df: Dataset[HostedByItemType]): Dataset[HostedByItemType] = { - val transformedData : Dataset[HostedByItemType] = df - .groupByKey(_.id)(Encoders.STRING) - .agg(Aggregators.hostedByAggregator) - .map{ - case (id:String , res:HostedByItemType) => res - }(Encoders.product[HostedByItemType]) - - transformedData - } - - val hostedByAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { - override def zero: HostedByItemType = HostedByItemType("","","","","",false) - override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { - return merge(b, a) - } - override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { - if (b1 == null){ - return b2 - } - if(b2 == null){ - return b1 - } - - HostedByItemType(getId(b1.id, b2.id), getId(b1.officialname, b2.officialname), getId(b1.issn, b2.issn), getId(b1.eissn, b2.eissn), getId(b1.lissn, b2.lissn), b1.openAccess || b2.openAccess) - - } - override def finish(reduction: HostedByItemType): HostedByItemType = reduction - override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - - override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - }.toColumn - def explodeHostedByItemType(df: Dataset[(String, HostedByItemType)]): Dataset[(String, HostedByItemType)] = { val transformedData : Dataset[(String, HostedByItemType)] = df .groupByKey(_._1)(Encoders.STRING) - .agg(Aggregators.hostedByAggregator1) + .agg(Aggregators.hostedByAggregator) .map{ case (id:String , res:(String, HostedByItemType)) => res }(Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType])) @@ -69,7 +37,7 @@ object Aggregators { transformedData } - val hostedByAggregator1: TypedColumn[(String, HostedByItemType), (String, HostedByItemType)] = new Aggregator[(String, HostedByItemType), (String, HostedByItemType), (String, HostedByItemType)] { + val hostedByAggregator: TypedColumn[(String, HostedByItemType), (String, HostedByItemType)] = new Aggregator[(String, HostedByItemType), (String, HostedByItemType), (String, HostedByItemType)] { override def zero: (String, HostedByItemType) = ("", HostedByItemType("","","","","",false)) override def reduce(b: (String, HostedByItemType), a:(String,HostedByItemType)): (String, HostedByItemType) = { return merge(b, a) @@ -94,4 +62,79 @@ object Aggregators { override def outputEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) }.toColumn + def hostedByToSingleDSId(df: Dataset[ HostedByItemType]): Dataset[ HostedByItemType] = { + val transformedData : Dataset[HostedByItemType] = df + .groupByKey(_.id)(Encoders.STRING) + .agg(Aggregators.hostedByToDSAggregator) + .map{ + case (id:String , res: HostedByItemType) => res + }(Encoders.product[HostedByItemType]) + + transformedData + } + + def hostedByToDSAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { + override def zero: HostedByItemType = HostedByItemType("","","","","",false) + + override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { + return merge(b, a) + } + override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + if(!b1.id.equals("")){ + return HostedByItemType(b1.id, b1.officialname, b1.issn, b1.eissn, b1.lissn, b1.openAccess || b2.openAccess) + + } + return HostedByItemType(b2.id, b2.officialname, b2.issn, b2.eissn, b2.lissn, b1.openAccess || b2.openAccess) + + } + override def finish(reduction: HostedByItemType): HostedByItemType = reduction + override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + + override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] + }.toColumn + + + def resultToSingleIdAggregator: TypedColumn[EntityInfo, EntityInfo] = new Aggregator[EntityInfo, EntityInfo, EntityInfo]{ + override def zero: EntityInfo = EntityInfo.newInstance("","","") + + override def reduce(b: EntityInfo, a:EntityInfo): EntityInfo = { + return merge(b, a) + } + override def merge(b1: EntityInfo, b2: EntityInfo): EntityInfo = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + if(!b1.getHb_id.equals("")){ + b1.setOpenaccess(b1.getOpenaccess || b2.getOpenaccess) + } + b2.setOpenaccess(b1.getOpenaccess || b2.getOpenaccess) + b2 + + } + override def finish(reduction: EntityInfo): EntityInfo = reduction + override def bufferEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + override def outputEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + }.toColumn + + def resultToSingleId(df:Dataset[EntityInfo]): Dataset[EntityInfo] = { + val transformedData : Dataset[EntityInfo] = df + .groupByKey(_.getId)(Encoders.STRING) + .agg(Aggregators.resultToSingleIdAggregator) + .map{ + case (id:String , res: EntityInfo) => res + }(Encoders.bean(classOf[EntityInfo])) + + transformedData + } + } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala new file mode 100644 index 000000000..03ab09d8e --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -0,0 +1,147 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DatasourceInfo, EntityInfo} +import eu.dnetlib.dhp.schema.oaf.{Datasource, Journal, Publication} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.json4s +import org.json4s.DefaultFormats +import org.json4s.jackson.JsonMethods.parse +import org.slf4j.{Logger, LoggerFactory} + +object SparkPrepareHostedByInfoToApply { + + + implicit val mapEncoderDSInfo: Encoder[DatasourceInfo] = Encoders.kryo[DatasourceInfo] + implicit val mapEncoderPInfo: Encoder[EntityInfo] = Encoders.kryo[EntityInfo] + + def getList(id: String, j: Journal, name: String ) : List[EntityInfo] = { + var lst:List[EntityInfo] = List() + + + if (j.getIssnLinking != null && !j.getIssnLinking.equals("")){ + lst = EntityInfo.newInstance(id, j.getIssnLinking, name) :: lst + } + if (j.getIssnOnline != null && !j.getIssnOnline.equals("")){ + lst = EntityInfo.newInstance(id, j.getIssnOnline, name) :: lst + } + if (j.getIssnPrinted != null && !j.getIssnPrinted.equals("")){ + lst = EntityInfo.newInstance(id, j.getIssnPrinted, name) :: lst + } + lst + } + + def prepareResultInfo(spark:SparkSession, publicationPath:String) : Dataset[EntityInfo] = { + implicit val mapEncoderPubs: Encoder[Publication] = Encoders.bean(classOf[Publication]) + + val mapper = new ObjectMapper() + + val dd : Dataset[Publication] = spark.read.textFile(publicationPath) + .map(r => mapper.readValue(r, classOf[Publication])) + + dd.filter(p => p.getJournal != null ).flatMap(p => getList(p.getId, p.getJournal, "")) + + } + + + + def prepareDatasourceInfo(spark:SparkSession, datasourcePath:String) : Dataset[DatasourceInfo] = { + implicit val mapEncoderDats: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) + + val mapper = new ObjectMapper() + + val dd : Dataset[Datasource] = spark.read.textFile(datasourcePath) + .map(r => mapper.readValue(r, classOf[Datasource])) + + dd.filter(d => d.getJournal != null ).map(d => DatasourceInfo.newInstance(d.getId, d.getOfficialname.getValue, + d.getJournal.getIssnPrinted, d.getJournal.getIssnOnline, d.getJournal.getIssnLinking)) + + } + def toHostedByItem(input:String): HostedByItemType = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + + lazy val json: json4s.JValue = parse(input) + val c :Map[String,HostedByItemType] = json.extract[Map[String, HostedByItemType]] + c.values.head + } + + def explodeJournalInfo(input: DatasourceInfo): List[EntityInfo] = { + var lst : List[EntityInfo] = List() + if (input.getEissn != null && !input.getEissn.equals("")){ + lst = EntityInfo.newInstance(input.getId, input.getEissn, input.getOfficialname, input.getOpenAccess) :: lst + } + + lst + } + + def main(args: Array[String]): Unit = { + + + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val graphPath = parser.get("graphPath") + + val outputPath = parser.get("outputPath") + val hostedByMapPath = parser.get("hostedByMapPath") + + + implicit val formats = DefaultFormats + + + logger.info("Getting the Datasources") + + import spark.implicits._ + + //STEP1: leggere le DS e creare le entries {dsid, dsofficialname, issn, eissn, lissn, openaccess} + val datasourceInfoDataset: Dataset[DatasourceInfo] = prepareDatasourceInfo(spark, "$graphPath/datasource") + + //STEP2: leggere la hostedbymap e raggruppare per datasource id + val hostedByDataset = Aggregators.hostedByToSingleDSId(spark.createDataset(spark.sparkContext.textFile(hostedByMapPath).map(toHostedByItem))) + + //STEP3: eseguire una join fra le datasource e la hostedby map (left) per settare se la datasource e' open access o no + //ed esplodere l'info della datasource per ogni journal id diverso da nullo + val join : Dataset[EntityInfo] = datasourceInfoDataset.joinWith(hostedByDataset, + datasourceInfoDataset.col("id").equalTo(hostedByDataset.col("id"), "left")) + .map(t2 => { + val dsi : DatasourceInfo = t2._1 + if(t2._2 != null){ + dsi.setOpenAccess(t2._2.openAccess) + } + dsi + }).flatMap(explodeJournalInfo) + + //STEP4: creare la mappa publication id issn, eissn, lissn esplosa + val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, "$graphPath/publication") + + //STEP5: join di join con resultInfo sul journal_id dal result con left + // e riduzione di tutti i result con lo stesso id in una unica entry + Aggregators.resultToSingleId(resultInfoDataset.joinWith(join, resultInfoDataset.col("journal_id").equalTo(join.col("journal_id")), "left") + .map(t2 => { + val res: EntityInfo = t2._1 + if(t2._2 != null ){ + val ds = t2._2 + res.setHb_id(ds.getId) + res.setOpenaccess(ds.getOpenaccess) + res.setName(ds.getName) + } + res + })).write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) + + + + } + +} From 1695d45bd41a772319d357642df536cd8634faf2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 30 Jul 2021 17:57:01 +0200 Subject: [PATCH 017/166] Hosted By Map - Test class to verify the preparation of the intermediate information --- .../oa/graph/hostedbymap/TestPrepare.scala | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala new file mode 100644 index 000000000..f01e4f59f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala @@ -0,0 +1,53 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DatasourceInfo +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, SparkSession} +import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} +import org.junit.jupiter.api.Test + +class TestPrepare extends java.io.Serializable{ + + @Test + def testPrepareDatasource():Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val path = getClass.getResource("datasource.json").getPath + + val ds :Dataset[DatasourceInfo]= SparkPrepareHostedByInfoToApply.prepareDatasourceInfo(spark, path) + + val mapper :ObjectMapper = new ObjectMapper() + + assertEquals(9, ds.count) + + assertEquals(8, ds.filter(hbi => !hbi.getIssn.equals("")).count) + assertEquals(5, ds.filter(hbi => !hbi.getEissn.equals("")).count) + assertEquals(0, ds.filter(hbi => !hbi.getLissn.equals("")).count) + + assertEquals(0, ds.filter(hbi => hbi.getIssn.equals("") && hbi.getEissn.equals("") && hbi.getLissn.equals("")).count) + + assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365")).count == 1) + assertTrue(ds.filter(hbi => hbi.getEissn.equals("2253-900X")).count == 1) + assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365") && hbi.getEissn.equals("2253-900X")).count == 1) + assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365") && hbi.getOfficialname.equals("Thémata")).count == 1) + assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365") && hbi.getId.equals("10|doajarticles::abbc9265bea9ff62776a1c39785af00c")).count == 1) + ds.foreach(hbi => assertTrue(hbi.getId.startsWith("10|"))) + ds.foreach(e => println(mapper.writeValueAsString(e))) + spark.close() + } + + @Test + def testPrepareHostedByMap():Unit = { + + } + + +} From ff1ce75e33e0f7cb2293c50e40b7ff83dd89bedb Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Aug 2021 19:32:59 +0200 Subject: [PATCH 018/166] Hosted By Map - modification in the code to prepare the info needed to apply the HostedByMap. There is no need to join datasources with the hbm: all the information needed is in the hosted by map already --- .../SparkPrepareHostedByInfoToApply.scala | 104 +++++++++--------- 1 file changed, 55 insertions(+), 49 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index 03ab09d8e..7395b2450 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -3,7 +3,8 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DatasourceInfo, EntityInfo} -import eu.dnetlib.dhp.schema.oaf.{Datasource, Journal, Publication} + +import eu.dnetlib.dhp.schema.oaf.{Journal, Publication} import org.apache.commons.io.IOUtils import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} @@ -12,11 +13,13 @@ import org.json4s.DefaultFormats import org.json4s.jackson.JsonMethods.parse import org.slf4j.{Logger, LoggerFactory} + + object SparkPrepareHostedByInfoToApply { - implicit val mapEncoderDSInfo: Encoder[DatasourceInfo] = Encoders.kryo[DatasourceInfo] - implicit val mapEncoderPInfo: Encoder[EntityInfo] = Encoders.kryo[EntityInfo] + implicit val mapEncoderDSInfo: Encoder[DatasourceInfo] = Encoders.bean(classOf[DatasourceInfo]) + implicit val mapEncoderPInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) def getList(id: String, j: Journal, name: String ) : List[EntityInfo] = { var lst:List[EntityInfo] = List() @@ -47,25 +50,12 @@ object SparkPrepareHostedByInfoToApply { } - - def prepareDatasourceInfo(spark:SparkSession, datasourcePath:String) : Dataset[DatasourceInfo] = { - implicit val mapEncoderDats: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) - - val mapper = new ObjectMapper() - - val dd : Dataset[Datasource] = spark.read.textFile(datasourcePath) - .map(r => mapper.readValue(r, classOf[Datasource])) - - dd.filter(d => d.getJournal != null ).map(d => DatasourceInfo.newInstance(d.getId, d.getOfficialname.getValue, - d.getJournal.getIssnPrinted, d.getJournal.getIssnOnline, d.getJournal.getIssnLinking)) - - } - def toHostedByItem(input:String): HostedByItemType = { + def toEntityInfo(input:String): EntityInfo = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) val c :Map[String,HostedByItemType] = json.extract[Map[String, HostedByItemType]] - c.values.head + toEntityItem(c.keys.head, c.values.head) } def explodeJournalInfo(input: DatasourceInfo): List[EntityInfo] = { @@ -73,10 +63,49 @@ object SparkPrepareHostedByInfoToApply { if (input.getEissn != null && !input.getEissn.equals("")){ lst = EntityInfo.newInstance(input.getId, input.getEissn, input.getOfficialname, input.getOpenAccess) :: lst } + if (input.getLissn != null && !input.getLissn.equals("")){ + lst = EntityInfo.newInstance(input.getId, input.getLissn, input.getOfficialname, input.getOpenAccess) :: lst + } + if (input.getIssn != null && !input.getIssn.equals("")){ + lst = EntityInfo.newInstance(input.getId, input.getIssn, input.getOfficialname, input.getOpenAccess) :: lst + } lst } + def joinDsandHBM(left:Dataset[DatasourceInfo], right:Dataset[HostedByItemType]): Dataset[EntityInfo] = { + left.joinWith(right, + left.col("id").equalTo(right.col("id")), "left") + .map(t2 => { + val dsi : DatasourceInfo = t2._1 + if(t2._2 != null){ + val hbi : HostedByItemType = t2._2 + dsi.setOpenAccess(hbi.openAccess) + } + dsi + }).flatMap(explodeJournalInfo) + } + + def toEntityItem(journal_id: String , hbi: HostedByItemType): EntityInfo = { + + EntityInfo.newInstance(hbi.id, journal_id, hbi.officialname, hbi.openAccess) + + } + + def joinResHBM(res: Dataset[EntityInfo], hbm: Dataset[EntityInfo]): Dataset[EntityInfo] = { + Aggregators.resultToSingleId(res.joinWith(hbm, res.col("journal_id").equalTo(hbm.col("journal_id")), "left") + .map(t2 => { + val res: EntityInfo = t2._1 + if(t2._2 != null ){ + val ds = t2._2 + res.setHb_id(ds.getId) + res.setOpenaccess(ds.getOpenaccess) + res.setName(ds.getName) + } + res + })) + } + def main(args: Array[String]): Unit = { @@ -105,43 +134,20 @@ object SparkPrepareHostedByInfoToApply { import spark.implicits._ - //STEP1: leggere le DS e creare le entries {dsid, dsofficialname, issn, eissn, lissn, openaccess} - val datasourceInfoDataset: Dataset[DatasourceInfo] = prepareDatasourceInfo(spark, "$graphPath/datasource") - //STEP2: leggere la hostedbymap e raggruppare per datasource id - val hostedByDataset = Aggregators.hostedByToSingleDSId(spark.createDataset(spark.sparkContext.textFile(hostedByMapPath).map(toHostedByItem))) + //STEP1: leggere la hostedbymap e trasformarla in entity info + val hostedByInfo:Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) - //STEP3: eseguire una join fra le datasource e la hostedby map (left) per settare se la datasource e' open access o no - //ed esplodere l'info della datasource per ogni journal id diverso da nullo - val join : Dataset[EntityInfo] = datasourceInfoDataset.joinWith(hostedByDataset, - datasourceInfoDataset.col("id").equalTo(hostedByDataset.col("id"), "left")) - .map(t2 => { - val dsi : DatasourceInfo = t2._1 - if(t2._2 != null){ - dsi.setOpenAccess(t2._2.openAccess) - } - dsi - }).flatMap(explodeJournalInfo) - - //STEP4: creare la mappa publication id issn, eissn, lissn esplosa + //STEP2: creare la mappa publication id issn, eissn, lissn esplosa val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, "$graphPath/publication") - //STEP5: join di join con resultInfo sul journal_id dal result con left - // e riduzione di tutti i result con lo stesso id in una unica entry - Aggregators.resultToSingleId(resultInfoDataset.joinWith(join, resultInfoDataset.col("journal_id").equalTo(join.col("journal_id")), "left") - .map(t2 => { - val res: EntityInfo = t2._1 - if(t2._2 != null ){ - val ds = t2._2 - res.setHb_id(ds.getId) - res.setOpenaccess(ds.getOpenaccess) - res.setName(ds.getName) - } - res - })).write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) - + //STEP3: join resultInfo con hostedByInfo sul journal_id dal result con left + // e riduzione di tutti i result con lo stesso id in una unica entry con aggiunto l'id della datasource + joinResHBM(resultInfoDataset, hostedByInfo) + .write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) } + } From 72df8f92320c7d0ddcecece72a8e3502f5b03022 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Aug 2021 19:34:44 +0200 Subject: [PATCH 019/166] Hosted By Map - removed the aggregator for the datasource (it is no more needed) and added a new aggregator for the results. Changed also the hostedBYMap aggregator --- .../oa/graph/hostedbymap/Aggregators.scala | 36 +------------------ 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala index 8077efe30..af4ac4507 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -62,42 +62,7 @@ object Aggregators { override def outputEncoder: Encoder[(String,HostedByItemType)] = Encoders.tuple(Encoders.STRING,Encoders.product[HostedByItemType]) }.toColumn - def hostedByToSingleDSId(df: Dataset[ HostedByItemType]): Dataset[ HostedByItemType] = { - val transformedData : Dataset[HostedByItemType] = df - .groupByKey(_.id)(Encoders.STRING) - .agg(Aggregators.hostedByToDSAggregator) - .map{ - case (id:String , res: HostedByItemType) => res - }(Encoders.product[HostedByItemType]) - transformedData - } - - def hostedByToDSAggregator: TypedColumn[HostedByItemType, HostedByItemType] = new Aggregator[HostedByItemType, HostedByItemType, HostedByItemType] { - override def zero: HostedByItemType = HostedByItemType("","","","","",false) - - override def reduce(b: HostedByItemType, a:HostedByItemType): HostedByItemType = { - return merge(b, a) - } - override def merge(b1: HostedByItemType, b2: HostedByItemType): HostedByItemType = { - if (b1 == null){ - return b2 - } - if(b2 == null){ - return b1 - } - if(!b1.id.equals("")){ - return HostedByItemType(b1.id, b1.officialname, b1.issn, b1.eissn, b1.lissn, b1.openAccess || b2.openAccess) - - } - return HostedByItemType(b2.id, b2.officialname, b2.issn, b2.eissn, b2.lissn, b1.openAccess || b2.openAccess) - - } - override def finish(reduction: HostedByItemType): HostedByItemType = reduction - override def bufferEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - - override def outputEncoder: Encoder[HostedByItemType] = Encoders.product[HostedByItemType] - }.toColumn def resultToSingleIdAggregator: TypedColumn[EntityInfo, EntityInfo] = new Aggregator[EntityInfo, EntityInfo, EntityInfo]{ @@ -115,6 +80,7 @@ object Aggregators { } if(!b1.getHb_id.equals("")){ b1.setOpenaccess(b1.getOpenaccess || b2.getOpenaccess) + return b1 } b2.setOpenaccess(b1.getOpenaccess || b2.getOpenaccess) b2 From 1e859706a3516b8103342cbe875004a6c112d35b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Aug 2021 19:35:23 +0200 Subject: [PATCH 020/166] Hosted By Map - Classes to apply the HBM to results and datasources --- .../SparkApplyHostedByMapToDatasource.scala | 72 +++++++++++++++ .../SparkApplyHostedByMapToResult.scala | 88 +++++++++++++++++++ 2 files changed, 160 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala new file mode 100644 index 000000000..4ecea63f3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -0,0 +1,72 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToResult.{applyHBtoPubs, getClass} +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.{Datasource, Publication} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.json4s.DefaultFormats +import org.slf4j.{Logger, LoggerFactory} + +object SparkApplyHostedByMapToDatasource { + + def applyHBtoDats(join: Dataset[EntityInfo], dats: Dataset[Datasource]): Dataset[Datasource] = { + dats.joinWith(join, dats.col("id").equalTo(join.col("hb_id")), "left") + .map(t2 => { + val d: Datasource = t2._1 + if (t2._2 != null) { + if (d.getOpenairecompatibility.getClassid.equals(ModelConstants.UNKNOWN)) { + d.getOpenairecompatibility.setClassid("hostedBy") + d.getOpenairecompatibility.setClassname("collected from a compatible aggregator") + } + } + d + })(Encoders.bean((classOf[Datasource]))) + } + def main(args: Array[String]): Unit = { + + + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val graphPath = parser.get("graphPath") + + val outputPath = parser.get("outputPath") + val workingPath = parser.get("workingPath") + + + implicit val formats = DefaultFormats + + + implicit val mapEncoderPubs: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) + implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + val mapper = new ObjectMapper() + + val dats : Dataset[Datasource] = spark.read.textFile("$graphPath/datasource") + .map(r => mapper.readValue(r, classOf[Datasource])) + + val pinfo : Dataset[EntityInfo] = spark.read.textFile("$workingPath/preparedInfo") + .map(ei => mapper.readValue(ei, classOf[EntityInfo])) + + + + //c. dataset join risultato del passo prima di a per datasource id, gruppo per ds id e cambio compatibilita' se necessario + + applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(s"$graphPath/datasource") + } + + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala new file mode 100644 index 000000000..37afc319a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -0,0 +1,88 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils +import eu.dnetlib.dhp.schema.oaf.{Datasource, Instance, OpenAccessRoute, Publication} +import org.apache.commons.io.IOUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.json4s.DefaultFormats +import org.slf4j.{Logger, LoggerFactory} + +import scala.collection.JavaConverters._ + +//a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance +// nel result => salto)con l'hosted by anche access right della instance se openaccess e' true + + +object SparkApplyHostedByMapToResult { + + def applyHBtoPubs(join: Dataset[EntityInfo], pubs: Dataset[Publication]) = { + pubs.joinWith(join, pubs.col("id").equalTo(join.col("id")), "left") + .map(t2 => { + val p: Publication = t2._1 + if (t2._2 != null) { + val ei: EntityInfo = t2._2 + val i = p.getInstance().asScala + if (i.size == 1) { + val inst: Instance = i(0) + + inst.getHostedby.setKey(ei.getHb_id) + inst.getHostedby.setValue(ei.getName) + if (ei.getOpenaccess) { + inst.setAccessright(OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN, "Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)) + inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.hybrid) + } + + } + } + p + })(Encoders.bean(classOf[Publication])) + } + def main(args: Array[String]): Unit = { + + + val logger: Logger = LoggerFactory.getLogger(getClass) + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + + val graphPath = parser.get("graphPath") + + val outputPath = parser.get("outputPath") + val workingPath = parser.get("workingPath") + + + implicit val formats = DefaultFormats + + + implicit val mapEncoderPubs: Encoder[Publication] = Encoders.bean(classOf[Publication]) + implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + val mapper = new ObjectMapper() + + val pubs : Dataset[Publication] = spark.read.textFile("$graphPath/publication") + .map(r => mapper.readValue(r, classOf[Publication])) + + val pinfo : Dataset[EntityInfo] = spark.read.textFile("$workingPath/preparedInfo") + .map(ei => mapper.readValue(ei, classOf[EntityInfo])) + + //a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance + // nel result => salto)con l'hosted by anche access right della instance se openaccess e' true + applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json("$graphPath/publication") + + + + } + + +} From 90e91486e2cc63a51e51f5e2412ad3ff7c49ba19 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Aug 2021 19:35:52 +0200 Subject: [PATCH 021/166] Hosted By Map - test class to verify each step in the preparation process --- .../oa/graph/hostedbymap/TestPrepare.scala | 142 +++++++++++++++--- 1 file changed, 124 insertions(+), 18 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala index f01e4f59f..efd598fef 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala @@ -1,16 +1,31 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper -import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DatasourceInfo +import eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply.{joinResHBM, prepareResultInfo, toEntityInfo} +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import eu.dnetlib.dhp.schema.oaf.{Datasource, OpenAccessRoute, Publication} +import javax.management.openmbean.OpenMBeanAttributeInfo import org.apache.spark.SparkConf -import org.apache.spark.sql.{Dataset, SparkSession} +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.json4s +import org.json4s.DefaultFormats +import eu.dnetlib.dhp.schema.common.ModelConstants import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} import org.junit.jupiter.api.Test class TestPrepare extends java.io.Serializable{ + def getString(input:HostedByItemType):String = { + + import org.json4s.jackson.Serialization.write + implicit val formats = DefaultFormats + + write(input) + } + + @Test - def testPrepareDatasource():Unit = { + def testHostedByMaptoEntityInfo() : Unit = { val conf = new SparkConf() conf.setMaster("local[*]") conf.set("spark.driver.host", "localhost") @@ -20,34 +35,125 @@ class TestPrepare extends java.io.Serializable{ .appName(getClass.getSimpleName) .config(conf) .getOrCreate() - val path = getClass.getResource("datasource.json").getPath + val hbm = getClass.getResource("hostedbymap.json").getPath - val ds :Dataset[DatasourceInfo]= SparkPrepareHostedByInfoToApply.prepareDatasourceInfo(spark, path) - val mapper :ObjectMapper = new ObjectMapper() + import spark.implicits._ - assertEquals(9, ds.count) + val mapper:ObjectMapper = new ObjectMapper() - assertEquals(8, ds.filter(hbi => !hbi.getIssn.equals("")).count) - assertEquals(5, ds.filter(hbi => !hbi.getEissn.equals("")).count) - assertEquals(0, ds.filter(hbi => !hbi.getLissn.equals("")).count) + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) - assertEquals(0, ds.filter(hbi => hbi.getIssn.equals("") && hbi.getEissn.equals("") && hbi.getLissn.equals("")).count) + val ds :Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hbm)).map(toEntityInfo) - assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365")).count == 1) - assertTrue(ds.filter(hbi => hbi.getEissn.equals("2253-900X")).count == 1) - assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365") && hbi.getEissn.equals("2253-900X")).count == 1) - assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365") && hbi.getOfficialname.equals("Thémata")).count == 1) - assertTrue(ds.filter(hbi => hbi.getIssn.equals("0212-8365") && hbi.getId.equals("10|doajarticles::abbc9265bea9ff62776a1c39785af00c")).count == 1) - ds.foreach(hbi => assertTrue(hbi.getId.startsWith("10|"))) ds.foreach(e => println(mapper.writeValueAsString(e))) + + assertEquals(20, ds.count) spark.close() } @Test - def testPrepareHostedByMap():Unit = { + def testPublicationtoEntityInfo() : Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val path = getClass.getResource("publication.json").getPath + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + val ds :Dataset[EntityInfo] = prepareResultInfo(spark, path) + + ds.foreach(e => println(mapper.writeValueAsString(e))) + + assertEquals(2, ds.count) + + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournal_id.equals("1728-5852")).first().getId) + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournal_id.equals("0001-396X")).first().getId) + + spark.close() + } + + @Test + def testJoinResHBM (): Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val pub = getClass.getResource("iteminfofrompublication").getPath + val hbm = getClass.getResource("iteminfofromhostedbymap.json").getPath + + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + val pub_ds :Dataset[EntityInfo] = spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[EntityInfo])) + val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + + val ds: Dataset[EntityInfo] = joinResHBM(pub_ds, hbm_ds) + + assertEquals(1, ds.count) + + val ei:EntityInfo = ds.first() + + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ei.getId) + assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHb_id) + assertEquals("0001-396X", ei.getJournal_id) + assertEquals("Academic Therapy", ei.getName) + assertTrue(!ei.getOpenaccess) + + spark.close() + } + + @Test + def testJoinResHBM2 (): Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val pub = getClass.getResource("iteminfofrompublication2").getPath + val hbm = getClass.getResource("iteminfofromhostedbymap2.json").getPath + + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + val pub_ds :Dataset[EntityInfo] = spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[EntityInfo])) + val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + + val ds: Dataset[EntityInfo] = joinResHBM(pub_ds, hbm_ds) + + assertEquals(1, ds.count) + + val ei:EntityInfo = ds.first() + + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ei.getId) + assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHb_id) + assertEquals("Academic Therapy", ei.getName) + assertTrue(ei.getOpenaccess) + + ds.foreach(e => println(mapper.writeValueAsString(e))) + + spark.close() } + } From ee7ccb98dc716313b620dc47d02b1bb277632246 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Aug 2021 19:36:18 +0200 Subject: [PATCH 022/166] Hosted By Map - test class to verify the application of the hbm to results and datasource --- .../dhp/oa/graph/hostedbymap/TestApply.scala | 134 ++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala new file mode 100644 index 000000000..b78eb978b --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala @@ -0,0 +1,134 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.{Datasource, OpenAccessRoute, Publication} +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} +import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} +import org.junit.jupiter.api.Test + +class TestApply extends java.io.Serializable{ + + @Test + def testApplyOnResult (): Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val pub = getClass.getResource("publication.json").getPath + val hbm = getClass.getResource("preparedInfo.json").getPath + + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + implicit val mapEncoderPubInfo: Encoder[Publication] = Encoders.bean(classOf[Publication]) + + + val pub_ds :Dataset[Publication] = spark.read.textFile(pub).map(p => mapper.readValue(p, classOf[Publication])) + val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + + + assertEquals(13, pub_ds.count()) + + val ds:Dataset[Publication] = SparkApplyHostedByMapToResult.applyHBtoPubs(hbm_ds, pub_ds) + + assertEquals(13, ds.count) + + val temp: Dataset[(Publication, Publication)] = pub_ds.joinWith(ds, pub_ds.col("id").equalTo(ds.col("id")), "left") + assertEquals(13, temp.count()) + temp.foreach(t2 => { + val pb : Publication = t2._1 + val pa : Publication = t2._2 + assertEquals(1, pa.getInstance().size()) + assertEquals(1, pb.getInstance().size()) + assertTrue(t2._1.getId.equals(t2._2.getId)) + if(pb.getId.equals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9")){ + assertTrue(pa.getInstance().get(0).getHostedby.getKey.equals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735")) + assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals("Academic Therapy")) + assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals("OPEN")) + assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals("Open Access")) + assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.hybrid)) + + + assertTrue(pb.getInstance().get(0).getHostedby.getKey.equals("10|openaire____::0b74b6a356bbf23c245f9ae9a748745c")) + assertTrue(pb.getInstance().get(0).getHostedby.getValue.equals("Revistas de investigación Universidad Nacional Mayor de San Marcos")) + assertTrue(pb.getInstance().get(0).getAccessright.getClassname.equals("Open Access")) + assertTrue(pb.getInstance().get(0).getAccessright.getClassid.equals("OPEN")) + assertTrue(pb.getInstance().get(0).getAccessright.getOpenAccessRoute == null) + + }else{ + assertTrue(pa.getInstance().get(0).getHostedby.getKey.equals(pb.getInstance().get(0).getHostedby.getKey)) + assertTrue(pa.getInstance().get(0).getHostedby.getValue.equals(pb.getInstance().get(0).getHostedby.getValue)) + assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals(pb.getInstance().get(0).getAccessright.getClassid)) + assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals(pb.getInstance().get(0).getAccessright.getClassname)) + assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute == pb.getInstance().get(0).getAccessright.getOpenAccessRoute) + + } + }) + + spark.close() + } + + + @Test + def testApplyOnDatasource():Unit = { + val conf = new SparkConf() + conf.setMaster("local[*]") + conf.set("spark.driver.host", "localhost") + val spark: SparkSession = + SparkSession + .builder() + .appName(getClass.getSimpleName) + .config(conf) + .getOrCreate() + val dats = getClass.getResource("datasource.json").getPath + val hbm = getClass.getResource("preparedInfo2.json").getPath + + val mapper:ObjectMapper = new ObjectMapper() + + implicit val mapEncoderDSInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + implicit val mapEncoderPubInfo: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) + + + val dats_ds :Dataset[Datasource] = spark.read.textFile(dats).map(p => mapper.readValue(p, classOf[Datasource])) + val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + + + assertEquals(10, dats_ds.count()) + + val ds:Dataset[Datasource] = SparkApplyHostedByMapToDatasource.applyHBtoDats(hbm_ds, dats_ds) + + assertEquals(10, ds.count) + + val temp: Dataset[(Datasource, Datasource)] = dats_ds.joinWith(ds, dats_ds.col("id").equalTo(ds.col("id")), "left") + assertEquals(10, temp.count()) + temp.foreach(t2 => { + val pb : Datasource = t2._1 + val pa : Datasource = t2._2 + assertTrue(t2._1.getId.equals(t2._2.getId)) + if(pb.getId.equals("10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d")){ + assertTrue(pa.getOpenairecompatibility().getClassid.equals("hostedBy")) + assertTrue(pa.getOpenairecompatibility().getClassname.equals("collected from a compatible aggregator")) + + assertTrue(pb.getOpenairecompatibility().getClassid.equals(ModelConstants.UNKNOWN)) + + + }else{ + assertTrue(pa.getOpenairecompatibility().getClassid.equals(pb.getOpenairecompatibility.getClassid)) + assertTrue(pa.getOpenairecompatibility().getClassname.equals(pb.getOpenairecompatibility.getClassid)) + + } + }) + + spark.close() + + } + +} From 17292c6641439f175b41e665422dea25a19d44f7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 2 Aug 2021 19:37:08 +0200 Subject: [PATCH 023/166] Hosted By Map - resources for testing purposes --- .../dhp/oa/graph/hostedbymap/datasource.json | 4 ++-- .../oa/graph/hostedbymap/datasourceinfo.json | 9 +++++++++ .../graph/hostedbymap/hostedbyitemtype.json | 17 ++++++++++++++++ .../dhp/oa/graph/hostedbymap/hostedbymap.json | 20 +++++++++++++++++++ .../hostedbymap/iteminfofromhostedbymap.json | 20 +++++++++++++++++++ .../hostedbymap/iteminfofromhostedbymap2.json | 20 +++++++++++++++++++ .../graph/hostedbymap/iteminfofrompublication | 2 ++ .../hostedbymap/iteminfofrompublication2 | 2 ++ .../oa/graph/hostedbymap/preparedInfo.json | 1 + .../oa/graph/hostedbymap/preparedInfo2.json | 2 ++ .../dhp/oa/graph/hostedbymap/publication.json | 13 ++++++++++++ 11 files changed, 108 insertions(+), 2 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceinfo.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbyitemtype.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbymap.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json index 4467c702f..5eb41bff5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasource.json @@ -1,5 +1,5 @@ -{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":null,"dataprovider":{"dataInfo":null,"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":null,"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":null,"value":"0.0"},"longitude":{"dataInfo":null,"value":"0.0"},"namespaceprefix":{"dataInfo":null,"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":null,"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":null,"value":"0.0"},"officialname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":null,"value":false},"subjects":[{"dataInfo":null,"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":null,"value":false},"websiteurl":{"dataInfo":null,"value":"https://www.energyret.ru/jour/"}} -{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":null,"dataprovider":{"dataInfo":null,"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-03-01","englishname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"extraInfo":[],"id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d","journal":{"dataInfo":null,"issnPrinted":"1998-9903","name":"Известия высших учебных заведений: Проблемы энергетики"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":null,"value":"0.0"},"longitude":{"dataInfo":null,"value":"0.0"},"namespaceprefix":{"dataInfo":null,"value":"doaj19989903"},"odcontenttypes":[{"dataInfo":null,"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":null,"value":"0.0"},"officialname":{"dataInfo":null,"value":"Известия высших учебных заведений: Проблемы энергетики"},"openairecompatibility":{"classid":"UNKNOWN","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::1998-9903"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":null,"value":false},"subjects":[{"dataInfo":null,"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Technology: Electrical engineering. Electronics. Nuclear engineering: Production of electric energy or power. Powerplants. Central stations"}],"versioning":{"dataInfo":null,"value":false},"websiteurl":{"dataInfo":null,"value":"https://www.energyret.ru/jour/"}} +{"accessinfopackage":[],"collectedfrom":[{"key":"10|driver______::bee53aa31dc2cbb538c10c2b65fa5824","value":"DOAJ-Articles"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2014-12-01","description":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"philosophical research,classical texts of philosophy"},"englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"extraInfo":[],"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2253-900X","issnPrinted":"0212-8365","name":"Thémata"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"doaj02128365"},"odcontenttypes":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Journal articles"}],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Thémata"},"openairecompatibility":{"classid":"native","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["doajarticles::0212-8365"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Philosophy. Psychology. Religion: Aesthetics | Philosophy. Psychology. Religion: Logic"}],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"websiteurl":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"https://revistascientificas.us.es/index.php/themata/index"}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"extraInfo":[],"id":"10|issn___print::051e86306840dc8255d95c5671e97928","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"","issnPrinted":"2077-3757","name":"Science Technology & Public Policy"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl26404613"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Science Technology & Public Policy"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2640-4613"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"extraInfo":[],"id":"10|issn___print::4b2e7f05b6353940e5a7a592f2a87c94","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2605-8359","issnPrinted":"0751-4239","name":"Cahiers d’études germaniques"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl07514239"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Cahiers d’études germaniques"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::0751-4239"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} {"accessinfopackage":[],"collectedfrom":[{"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2","value":"Crossref"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dataprovider":{"dataInfo":{"inferenceprovenance":null,"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"datasourcetype":{"classid":"pubsrepository::journal","classname":"Journal","schemeid":"dnet:datasource_typologies","schemename":"dnet:datasource_typologies"},"dateofcollection":"2020-07-10","englishname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"extraInfo":[],"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","journal":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"issnOnline":"2311-8733","issnPrinted":"2073-1477","name":"Regional Economics Theory and Practice"},"lastupdatetimestamp":1626336932282,"latitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"longitude":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"namespaceprefix":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"jrnl20731477"},"odcontenttypes":[],"odlanguages":[],"odnumberofitems":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0.0"},"officialname":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Regional Economics Theory and Practice"},"openairecompatibility":{"classid":"hostedBy","classname":"collected from a compatible aggregator","schemeid":"dnet:datasourceCompatibilityLevel","schemename":"dnet:datasourceCompatibilityLevel"},"originalId":["issn___print::2073-1477"],"pid":[],"policies":[],"serviceprovider":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false},"subjects":[],"versioning":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":false}} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceinfo.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceinfo.json new file mode 100644 index 000000000..fd51cc2f2 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/datasourceinfo.json @@ -0,0 +1,9 @@ +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","officialname":"Известия высших учебных заведений: Проблемы энергетики","issn":"1998-9903","eissn":"","lissn":"","openAccess":false} +{"id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c","officialname":"Thémata","issn":"0212-8365","eissn":"2253-900X","lissn":"","openAccess":false} +{"id":"10|issn___print::051e86306840dc8255d95c5671e97928","officialname":"Science Technology & Public Policy","issn":"2077-3757","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","officialname":"Cahiers d’études germaniques","issn":"0751-4239","eissn":"2605-8359","lissn":"","openAccess":false} +{"id":"10|issn___print::4c950a72660642d69e767d1c2daad4a2","officialname":"Regional Economics Theory and Practice","issn":"2073-1477","eissn":"2311-8733","lissn":"","openAccess":false} +{"id":"10|issn___print::9241f8ebd40dd55cbb179028b84ebb12","officialname":"Transplantation","issn":"0041-1337","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::982b4d2537d3f800b596fbec3dae0c7c","officialname":"International Journal of Operations Research and Information Systems","issn":"1947-9328","eissn":"1947-9336","lissn":"","openAccess":false} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","officialname":"Bulletin of the British Mycological Society","issn":"0007-1528","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","officialname":"Journal of Technology and Innovation","issn":"","eissn":"2410-3993","lissn":"","openAccess":false} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbyitemtype.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbyitemtype.json new file mode 100644 index 000000000..6e4c11f49 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbyitemtype.json @@ -0,0 +1,17 @@ +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","officialname":"Academic Therapy","issn":"0001-396X","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","officialname":"Forschung im Ingenieurwesen","issn":"0015-7899","eissn":"1434-0860","lissn":"","openAccess":true} +{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","officialname":"Review of Polarography","issn":"0034-6691","eissn":"1884-7692","lissn":"","openAccess":false} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","officialname":"Land Economics","issn":"0023-7639","eissn":"1543-8325","lissn":"","openAccess":false} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","officialname":"Journal of Economic Entomology","issn":"0022-0493","eissn":"0022-0493","lissn":"","openAccess":false} +{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","officialname":"Brigham Young University science bulletin","issn":"0068-1024","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","officialname":"Journal of Contemporary Psychotherapy","issn":"0022-0116","eissn":"1573-3564","lissn":"","openAccess":false} +{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","officialname":"Quarterly of Applied Mathematics","issn":"0033-569X","eissn":"1552-4485","lissn":"","openAccess":false} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","officialname":"Revue de Synthèse","issn":"0035-1776","eissn":"1955-2343","lissn":"","openAccess":false} +{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","officialname":"Journal of Statistical Physics","issn":"0022-4715","eissn":"1572-9613","lissn":"","openAccess":false} +{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","officialname":"Children s Literature in Education","issn":"0045-6713","eissn":"1573-1693","lissn":"","openAccess":false} +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","officialname":"Slovenské divadlo","issn":"0037-699X","eissn":"1336-8605","lissn":"","openAccess":true} +{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","officialname":"Vistas in Astronomy","issn":"0083-6656","eissn":"","lissn":"","openAccess":false} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","officialname":"Public Administration","issn":"0033-3298","eissn":"1467-9299","lissn":"","openAccess":false} +{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","officialname":"Memory & Cognition","issn":"0090-502X","eissn":"1532-5946","lissn":"","openAccess":false} +{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","officialname":"Littérature","issn":"0047-4800","eissn":"1958-5926","lissn":"","openAccess":false} +{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","officialname":"Proceedings of the Society for Analytical Chemistry","issn":"0037-9697","eissn":"","lissn":"","openAccess":false} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbymap.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbymap.json new file mode 100644 index 000000000..188380bc6 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedbymap.json @@ -0,0 +1,20 @@ +{"0001-396X":{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","officialname":"Academic Therapy","issn":"0001-396X","eissn":"","lissn":"","openAccess":false}} +{"0015-7899":{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","officialname":"Forschung im Ingenieurwesen","issn":"0015-7899","eissn":"1434-0860","lissn":"","openAccess":false}} +{"1434-0860":{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","officialname":"Forschung im Ingenieurwesen","issn":"0015-7899","eissn":"1434-0860","lissn":"","openAccess":true}} +{"0022-0116":{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","officialname":"Journal of Contemporary Psychotherapy","issn":"0022-0116","eissn":"1573-3564","lissn":"","openAccess":false}} +{"1573-3564":{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","officialname":"Journal of Contemporary Psychotherapy","issn":"0022-0116","eissn":"1573-3564","lissn":"","openAccess":false}} +{"0022-0493":{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","officialname":"Journal of Economic Entomology","issn":"0022-0493","eissn":"0022-0493","lissn":"","openAccess":false}} +{"0022-4715":{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","officialname":"Journal of Statistical Physics","issn":"0022-4715","eissn":"1572-9613","lissn":"","openAccess":false}} +{"1543-8325":{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","officialname":"Land Economics","issn":"0023-7639","eissn":"1543-8325","lissn":"","openAccess":false}} +{"0023-7639":{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","officialname":"Land Economics","issn":"0023-7639","eissn":"1543-8325","lissn":"","openAccess":false}} +{"0033-3298":{"id":"10|issn___print::91899e3872351895467856daeb798f63","officialname":"Public Administration","issn":"0033-3298","eissn":"1467-9299","lissn":"","openAccess":false}} +{"0033-569X":{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","officialname":"Quarterly of Applied Mathematics","issn":"0033-569X","eissn":"1552-4485","lissn":"","openAccess":false}} +{"0034-6691":{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","officialname":"Review of Polarography","issn":"0034-6691","eissn":"1884-7692","lissn":"","openAccess":false}} +{"0035-1776":{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","officialname":"Revue de Synthèse","issn":"0035-1776","eissn":"1955-2343","lissn":"","openAccess":false}} +{"0037-699X":{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","officialname":"Slovenské divadlo","issn":"0037-699X","eissn":"1336-8605","lissn":"","openAccess":true}} +{"0037-9697":{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","officialname":"Proceedings of the Society for Analytical Chemistry","issn":"0037-9697","eissn":"","lissn":"","openAccess":false}} +{"0045-6713":{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","officialname":"Children s Literature in Education","issn":"0045-6713","eissn":"1573-1693","lissn":"","openAccess":false}} +{"0047-4800":{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","officialname":"Littérature","issn":"0047-4800","eissn":"1958-5926","lissn":"","openAccess":false}} +{"0068-1024":{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","officialname":"Brigham Young University science bulletin","issn":"0068-1024","eissn":"","lissn":"","openAccess":false}} +{"0083-6656":{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","officialname":"Vistas in Astronomy","issn":"0083-6656","eissn":"","lissn":"","openAccess":false}} +{"0090-502X":{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","officialname":"Memory & Cognition","issn":"0090-502X","eissn":"1532-5946","lissn":"","openAccess":false}} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json new file mode 100644 index 000000000..dfb95816e --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json @@ -0,0 +1,20 @@ +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journal_id":"0001-396X","name":"Academic Therapy","openaccess":false,"hb_id":""} +{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journal_id":"0033-569X","name":"Quarterly of Applied Mathematics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journal_id":"0015-7899","name":"Forschung im Ingenieurwesen","openaccess":false,"hb_id":""} +{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journal_id":"0034-6691","name":"Review of Polarography","openaccess":false,"hb_id":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journal_id":"1434-0860","name":"Forschung im Ingenieurwesen","openaccess":true,"hb_id":""} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journal_id":"0035-1776","name":"Revue de Synthèse","openaccess":false,"hb_id":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"0022-0116","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journal_id":"0037-699X","name":"Slovenské divadlo","openaccess":true,"hb_id":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"1573-3564","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} +{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journal_id":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openaccess":false,"hb_id":""} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journal_id":"0022-0493","name":"Journal of Economic Entomology","openaccess":false,"hb_id":""} +{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journal_id":"0045-6713","name":"Children s Literature in Education","openaccess":false,"hb_id":""} +{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journal_id":"0022-4715","name":"Journal of Statistical Physics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journal_id":"0047-4800","name":"Littérature","openaccess":false,"hb_id":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"1543-8325","name":"Land Economics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journal_id":"0068-1024","name":"Brigham Young University science bulletin","openaccess":false,"hb_id":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"0023-7639","name":"Land Economics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journal_id":"0083-6656","name":"Vistas in Astronomy","openaccess":false,"hb_id":""} +{"id":"10|issn___print::91899e3872351895467856daeb798f63","journal_id":"0033-3298","name":"Public Administration","openaccess":false,"hb_id":""} +{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journal_id":"0090-502X","name":"Memory & Cognition","openaccess":false,"hb_id":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json new file mode 100644 index 000000000..3d2f2e005 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json @@ -0,0 +1,20 @@ +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journal_id":"0001-396X","name":"Academic Therapy","openaccess":false,"hb_id":""} +{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journal_id":"0033-569X","name":"Quarterly of Applied Mathematics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journal_id":"0015-7899","name":"Forschung im Ingenieurwesen","openaccess":false,"hb_id":""} +{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journal_id":"0034-6691","name":"Review of Polarography","openaccess":false,"hb_id":""} +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journal_id":"1434-0860","name":"Academic Therapy","openaccess":true,"hb_id":""} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journal_id":"0035-1776","name":"Revue de Synthèse","openaccess":false,"hb_id":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"0022-0116","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journal_id":"0037-699X","name":"Slovenské divadlo","openaccess":true,"hb_id":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"1573-3564","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} +{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journal_id":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openaccess":false,"hb_id":""} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journal_id":"0022-0493","name":"Journal of Economic Entomology","openaccess":false,"hb_id":""} +{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journal_id":"0045-6713","name":"Children s Literature in Education","openaccess":false,"hb_id":""} +{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journal_id":"0022-4715","name":"Journal of Statistical Physics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journal_id":"0047-4800","name":"Littérature","openaccess":false,"hb_id":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"1543-8325","name":"Land Economics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journal_id":"0068-1024","name":"Brigham Young University science bulletin","openaccess":false,"hb_id":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"0023-7639","name":"Land Economics","openaccess":false,"hb_id":""} +{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journal_id":"0083-6656","name":"Vistas in Astronomy","openaccess":false,"hb_id":""} +{"id":"10|issn___print::91899e3872351895467856daeb798f63","journal_id":"0033-3298","name":"Public Administration","openaccess":false,"hb_id":""} +{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journal_id":"0090-502X","name":"Memory & Cognition","openaccess":false,"hb_id":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication new file mode 100644 index 000000000..6ee4f94be --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication @@ -0,0 +1,2 @@ +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"1728-5852","name":"","openaccess":false,"hb_id":""} +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"0001-396X","name":"","openaccess":false,"hb_id":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 new file mode 100644 index 000000000..59bd32d4b --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 @@ -0,0 +1,2 @@ +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"1434-0860","name":"","openaccess":false,"hb_id":""} +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"0001-396X","name":"","openaccess":false,"hb_id":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json new file mode 100644 index 000000000..d9c201082 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json @@ -0,0 +1 @@ +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"1434-0860","name":"Academic Therapy","openaccess":true,"hb_id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json new file mode 100644 index 000000000..6e1cce3b6 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json @@ -0,0 +1,2 @@ +{"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} +{"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c"} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json new file mode 100644 index 000000000..602bedbda --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json @@ -0,0 +1,13 @@ +{"author":[{"fullname":"Shu, L.","name":"L.","pid":[],"rank":1,"surname":"Shu"},{"fullname":"Zhang, Y.","name":"Y.","pid":[],"rank":2,"surname":"Zhang"},{"fullname":"Chen, X.","name":"X.","pid":[],"rank":3,"surname":"Chen"},{"fullname":"Wang, S.","name":"S.","pid":[],"rank":4,"surname":"Wang"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2015-01-01"},"dateofcollection":"2021-07-10T12:27:03.175Z","dateoftransformation":"2021-07-10T12:38:22.255Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::022a6f1cbe150ebbfe79f31fe220d2e5","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1007/s11036-015-0594-3"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2015-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/f9b55b76-a06d-4b4e-90d5-ff1dee0f53c4"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813784431,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-02-26T11:19:47Z","harvestDate":"2021-07-10T12:27:03.175Z","identifier":"oai:cris.vtt.fi:publications/f9b55b76-a06d-4b4e-90d5-ff1dee0f53c4","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/f9b55b76-a06d-4b4e-90d5-ff1dee0f53c4","50|355e65625b88::022a6f1cbe150ebbfe79f31fe220d2e5"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Shu , L , Zhang , Y , Chen , X & Wang , S 2015 , ' Editorial for Special Issue on Industrial Networks and Intelligent Systems ' , Mobile Networks and Applications , vol. 20 , no. 2 , pp. 121-123 . https://doi.org/10.1007/s11036-015-0594-3"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Editorial for Special Issue on Industrial Networks and Intelligent Systems"}]} +{"author":[{"fullname":"Hemmilä, Kari","name":"Kari","pid":[],"rank":1,"surname":"Hemmilä"},{"fullname":"Saarni, Risto","name":"Risto","pid":[],"rank":2,"surname":"Saarni"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2002-01-01"},"dateofcollection":"2021-07-10T12:31:21.927Z","dateoftransformation":"2021-07-10T13:17:02.345Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Millainen on onnistunut ikkunaremontti? Mikä on asukkaan kannalta paras vaihtoehto? Mitä seikkoja ikkunaremontissa on erityisesti otettava huomioon? Kirjan tekijät diplomi-insinööri Kari Hemmilä ja tekniikan lisensiaatti Risto Saarni ovat olleet mukana monissa ikkunoita ja niiden remontointia koskevissa tutkimusprojekteissa. Näiden lisäksi kirja perustuu lukuisista ikkunaremonteista saatuihin kokemuksiin, remonttien lähtökohtiin, toteutukseen sekä toimivuuden ja käyttökokemusten seurantaan. Onnistunut ikkunaremontti on sellainen, johon ollaan tyytyväisiä vielä vuosien päästä. Sen perustana on perehtyminen nykytilaan, huolellinen toteutus ja kunnossapito, joita tässä kirjassa tuodaan esille monesta näkökulmasta."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::4349eedb466e22fa44b3fe1e0380e0a7","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2002-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/72bdf3be-eec3-4fbf-89bd-b0d9bbf5b523"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813851542,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2020-08-11T09:10:31Z","harvestDate":"2021-07-10T12:31:21.927Z","identifier":"oai:cris.vtt.fi:publications/72bdf3be-eec3-4fbf-89bd-b0d9bbf5b523","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/72bdf3be-eec3-4fbf-89bd-b0d9bbf5b523","50|355e65625b88::4349eedb466e22fa44b3fe1e0380e0a7"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Rakennustieto oy"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Hemmilä , K & Saarni , R 2002 , Ikkunaremontti . Rakennustieto oy , Helsinki ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Ikkunaremontti"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Window renovation"}]} +{"author":[{"fullname":"Pavlic, Davor","name":"Davor","pid":[],"rank":1,"surname":"Pavlic"},{"fullname":"Pulkkinen, Antti","name":"Antti","pid":[],"rank":2,"surname":"Pulkkinen"},{"fullname":"Riitahuhta, Asko","name":"Asko","pid":[],"rank":3,"surname":"Riitahuhta"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-01-01"},"dateofcollection":"2021-07-10T12:34:48.501Z","dateoftransformation":"2021-07-10T13:26:46.605Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::53dda1c97d9a6da19a97b3c3aadaa3a0","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2006-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/22f1944a-0af1-4774-86fe-27cd45ddc6ea"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813869068,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-01-26T12:03:33Z","harvestDate":"2021-07-10T12:34:48.501Z","identifier":"oai:cris.vtt.fi:publications/22f1944a-0af1-4774-86fe-27cd45ddc6ea","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/22f1944a-0af1-4774-86fe-27cd45ddc6ea","50|355e65625b88::53dda1c97d9a6da19a97b3c3aadaa3a0"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Design Society"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Pavlic , D , Pulkkinen , A & Riitahuhta , A 2006 , A conceptual framework of product family architecture . in DS 47: Proceedings of NordDesign 2006 Conference, Rejkjavik, Iceland, 16.-18.08.2006 . Design Society , pp. 212-222 . < https://www.designsociety.org/publication/24280/A+Conceptual+Framework+of+Product+Family+Architecture >"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"A conceptual framework of product family architecture"}]} +{"author":[{"fullname":"Carpen, Leena","name":"Leena","pid":[],"rank":1,"surname":"Carpen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1999-01-01"},"dateofcollection":"2021-07-10T12:27:29.079Z","dateoftransformation":"2021-07-10T13:36:20.24Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::5ef53b02a53fa87a5eb236484d9a011d","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1999-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/1f27e75d-365e-4810-be48-ef60378d8279"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813880572,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-05-17T10:37:19Z","harvestDate":"2021-07-10T12:27:29.079Z","identifier":"oai:cris.vtt.fi:publications/1f27e75d-365e-4810-be48-ef60378d8279","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/1f27e75d-365e-4810-be48-ef60378d8279","50|355e65625b88::5ef53b02a53fa87a5eb236484d9a011d"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Carpen , L 1999 , ' Case: microbial induced corrosion in paper machines ' , RenhetsTeknik , no. 2 , 16 ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Case: microbial induced corrosion in paper machines"}]} +{"author":[{"fullname":"Tuomola, Tuomas","name":"Tuomas","pid":[],"rank":1,"surname":"Tuomola"},{"fullname":"Siitonen, Veijo","name":"Veijo","pid":[],"rank":2,"surname":"Siitonen"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1970-01-01"},"dateofcollection":"2021-07-10T12:30:02.598Z","dateoftransformation":"2021-07-10T13:50:33.549Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::73a86fe5bef6d4fae3caf02f13840439","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1970-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/56ed9ab2-3b3c-4829-a63b-52311969dd30"]}],"language":{"classid":"fin","classname":"Finnish","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813901871,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-09-27T09:19:12Z","harvestDate":"2021-07-10T12:30:02.598Z","identifier":"oai:cris.vtt.fi:publications/56ed9ab2-3b3c-4829-a63b-52311969dd30","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/56ed9ab2-3b3c-4829-a63b-52311969dd30","50|355e65625b88::73a86fe5bef6d4fae3caf02f13840439"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"VTT Technical Research Centre of Finland"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Tuomola , T & Siitonen , V 1970 , Koulujen lämmitys ja ilmanvaihto. Osa 2. Eräitä LI-järjestelmien käyttökokeita vuosina 1965-1968 . Valtion teknillinen tutkimuslaitos: Lämpöteknillinen laboratorio. Tiedonanto , no. 4 , VTT Technical Research Centre of Finland , Helsinki ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Koulujen lämmitys ja ilmanvaihto. Osa 2. Eräitä LI-järjestelmien käyttökokeita vuosina 1965-1968"}]} +{"author":[{"fullname":"Paakkari, Jussi","name":"Jussi","pid":[],"rank":1,"surname":"Paakkari"},{"fullname":"Ailisto, Heikki","name":"Heikki","pid":[],"rank":2,"surname":"Ailisto"},{"fullname":"Niskala, Matti","name":"Matti","pid":[],"rank":3,"surname":"Niskala"},{"fullname":"Mäkäräinen, Masa","name":"Masa","pid":[],"rank":4,"surname":"Mäkäräinen"},{"fullname":"Väinämö, Kauko","name":"Kauko","pid":[],"rank":5,"surname":"Väinämö"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1999-01-01"},"dateofcollection":"2021-07-10T12:27:29.082Z","dateoftransformation":"2021-07-10T13:56:12.148Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::7cfd7ecc53246b72e306a5057e4487b3","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1999-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/33cedb13-48f3-4326-b5c3-18d248374378"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813911302,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-01-29T02:07:50Z","harvestDate":"2021-07-10T12:27:29.082Z","identifier":"oai:cris.vtt.fi:publications/33cedb13-48f3-4326-b5c3-18d248374378","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::7cfd7ecc53246b72e306a5057e4487b3","oai:cris.vtt.fi:publications/33cedb13-48f3-4326-b5c3-18d248374378"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Research and Development Centre of Kajaani"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Paakkari , J , Ailisto , H , Niskala , M , Mäkäräinen , M & Väinämö , K 1999 , Machine vision guided waterjet cutting . in 3rd International Symposium on Optics in Engineering. Kajaani, FI, 19 - 21 Jan. 1999 . Research and Development Centre of Kajaani , Kajaani ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Machine vision guided waterjet cutting"}]} +{"author":[{"fullname":"López-Lambas, M.","name":"M.","pid":[],"rank":1,"surname":"López-Lambas"},{"fullname":"López-Suárez, E.","name":"E.","pid":[],"rank":2,"surname":"López-Suárez"},{"fullname":"La Paix-Puello, L.","name":"L.","pid":[],"rank":3,"surname":"La Paix-Puello"},{"fullname":"Binsted, A.","name":"A.","pid":[],"rank":4,"surname":"Binsted"},{"fullname":"Tuominen, Anu","name":"Anu","pid":[],"rank":5,"surname":"Tuominen"},{"fullname":"Järvi, Tuuli","name":"Tuuli","pid":[],"rank":6,"surname":"Järvi"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-01-01"},"dateofcollection":"2021-07-10T12:26:56.401Z","dateoftransformation":"2021-07-10T14:03:19.178Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::899c4bc19c11e4e4ebe8d49a8c51c5f4","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813924212,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-07-31T04:35:29Z","harvestDate":"2021-07-10T12:26:56.401Z","identifier":"oai:cris.vtt.fi:publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15","50|355e65625b88::899c4bc19c11e4e4ebe8d49a8c51c5f4"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"European Commission EC"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"López-Lambas , M , López-Suárez , E , La Paix-Puello , L , Binsted , A , Tuominen , A & Järvi , T 2009 , Sustainable Development methodology development and application results : Deliverable 4.1 . European Commission EC ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Sustainable Development methodology development and application results:Deliverable 4.1"}]} +{"author":[{"fullname":"Vänskä, L.","name":"L.","pid":[],"rank":1,"surname":"Vänskä"},{"fullname":"Rosenberg, Rolf","name":"Rolf","pid":[],"rank":2,"surname":"Rosenberg"},{"fullname":"Pitkänen, V.","name":"V.","pid":[],"rank":3,"surname":"Pitkänen"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1983-01-01"},"dateofcollection":"2021-07-10T12:29:21.556Z","dateoftransformation":"2021-07-10T15:08:41.325Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

An automatic gamma spectrometer for activation analysis has been developed at the Technical Research Centre of Finland. The on-line system comprises a sample changer for up to 120 samples, detector, multichannel analyzer, microcomputer programmed with Basic language and input/output devices.

"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::e8f88044b1a95057152f5827e3f373a4","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/0167-5087(83)90428-3"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1983-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813653066,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-01-01T03:06:44Z","harvestDate":"2021-07-10T12:29:21.556Z","identifier":"oai:cris.vtt.fi:publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::e8f88044b1a95057152f5827e3f373a4","oai:cris.vtt.fi:publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vänskä , L , Rosenberg , R & Pitkänen , V 1983 , ' An automatic gamma spectrometer for activation analysis ' , Nuclear Instruments and Methods In Physics Research , vol. 213 , no. 2-3 , pp. 343 - 347 . https://doi.org/10.1016/0167-5087(83)90428-3"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"An automatic gamma spectrometer for activation analysis"}]} +{"author":[{"fullname":"Silla, Anne","name":"Anne","pid":[],"rank":1,"surname":"Silla"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2021-01-01"},"dateofcollection":"2021-07-10T12:36:22.169Z","dateoftransformation":"2021-07-10T15:19:31.29Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This paper presents an assessment framework with 13 criteria to systematically evaluate measures for improving the safety of level crossings (LCs). The criteria were first applied in the Finnish context, where eight safety measures were estimated to reduce LC accidents by more than 20%. Next, the estimates from the Finnish study were used as a starting point for evaluating innovative and cost-effective LC safety measures piloted during an EU project, SAFER-LC. One such measure was estimated to potentially reduce LC accidents by more than 20%. The proposed assessment framework is a good way to assess and categorise LC safety measures. The summary of the assessment criteria is further intended for decision-makers looking to implement effective measures in a specific situation or at a particular LC."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::f7d973bc9fc15080fa9491d997568847","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2021-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/19966012-bcd9-4427-8136-a60e91b152f9"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813676961,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-07-02T11:56:26Z","harvestDate":"2021-07-10T12:36:22.169Z","identifier":"oai:cris.vtt.fi:publications/19966012-bcd9-4427-8136-a60e91b152f9","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/19966012-bcd9-4427-8136-a60e91b152f9","50|355e65625b88::f7d973bc9fc15080fa9491d997568847"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Silla , A 2021 , Evaluation of Level crossing Safety Measures : Applicability of the Framework to Innovative and Low-cost Measures . in Transportation Research Board : The TRIS and ITRD database . 100th Annual Meeting of the Transportation Research Board (TRB) , Washington DC , United States , 5/01/21 . < https://trid.trb.org/view/1759287 >"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Evaluation of Level crossing Safety Measures:Applicability of the Framework to Innovative and Low-cost Measures"}]} +{"author":[{"fullname":"Barrientos Jiménez, Elsa Julia","name":"Elsa Julia","pid":[],"rank":1,"surname":"Barrientos Jiménez"},{"fullname":"Vildoso Villegas, Jesahel","name":"Jesahel","pid":[],"rank":2,"surname":"Vildoso Villegas"},{"fullname":"Sánchez García, Tula Carola","name":"Tula Carola","pid":[],"rank":3,"surname":"Sánchez García"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"dateofcollection":"2021-03-12T07:05:02.842Z","dateoftransformation":"2021-03-12T07:11:33.642Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This research consists of a diagnosis of the post graduate unit of UNMSM Faculty Education; it counts with the participation of 358 students of Second Specialty, Master and Doctorate programs. To carry out this diagnosis the instrument of the Iberoamerican University Association was taken into account. The following variables were used: students, teachers, study plans, research, management, surroundings, graduates and impact and evaluation. According to the established measurement the post graduate unit has obtained 69,27 points, which places it on a good level. This level considers a range point from 60 through 74."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"La investigación realiza un diagnóstico de la Unidad de Post Grado de la Facultad de Educación de la UNMSM, con la participación de 358 estudiantes de Segunda Especialidad, Maestrías y Doctorado. Para la realización del diagnóstico se consideró el instrumento de la Asociación Universitaria Iberoamericana de Post Grado, que toma en cuenta las siguientes variables: estudiantes, profesores, plan de estudios, investigación, gestión, entorno, egresados e impacto y evaluación. Realizado el diagnóstico de acuerdo a la medición establecida la UPG de Educación de acuerdo al puntaje obtenido de 69, 27 se ubica en el nivel bueno, ya que dicho nivel considera las puntuaciones comprendidas entre 60 y 74."}],"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"distributionlocation":"","hostedby":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://creativecommons.org/licenses/by-nc-sa/4.0"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://revistasinvestigacion.unmsm.edu.pe/index.php/educa/article/view/4754"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"0001-396X","issnPrinted":"1728-5852","name":"Investigación Educativa","sp":"","vol":""},"language":{"classid":"spa","classname":"Spanish; Castilian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627812364983,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Frevistasinvestigacion.unmsm.edu.pe%2Findex.php%2Findex%2Foai","datestamp":"2021-03-06T03:56:00Z","harvestDate":"2021-03-12T07:05:02.842Z","identifier":"oai:ojs.csi.unmsm:article/4754","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","oai:ojs.csi.unmsm:article/4754"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Facultad de Educación, Universidad Nacional Mayor de San Marcos"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol 14 No 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol. 14 Núm. 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1728-5852"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnostic"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluation."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnóstico"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluación."}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNOSTIC OF THE POST GRADUATE UNIT OF UNMSM EDUCATION FACULTY"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNÓSTICO DE LA UNIDAD DE POST GRADO DE LA FACULTAD DE EDUCACIÓN DE LA UNMSM"}]} +{"author":[{"affiliation":[],"fullname":"Jež Rogelj, Mateja","name":"Mateja","pid":[],"rank":1,"surname":"Jež Rogelj"},{"affiliation":[],"fullname":"Mikuš, Ornella","name":"Ornella","pid":[],"rank":2,"surname":"Mikuš"},{"affiliation":[],"fullname":"Grgić, Ivo","name":"Ivo","pid":[],"rank":3,"surname":"Grgić"},{"affiliation":[],"fullname":"Zrakić, Magdalena","name":"Magdalena","pid":[],"rank":4,"surname":"Zrakić"},{"affiliation":[],"fullname":"Hadelan, Lari","name":"Lari","pid":[],"rank":5,"surname":"Hadelan"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"dateofcollection":"2021-07-11T01:25:30.597Z","dateoftransformation":"2021-07-11T02:00:20.813Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Koncept održivog razvoja ima tri komponente: ekološku, ekonomsku i društvenu. U ovom je radu odabirom pet ekoloških indikatora obuhvaćena ekološka komponenta. Indikatori su birani s obzirom na učestalost njihova predlaganja i korištenja u dokumentima Europske unije (EU) i znanstvenim radovima. Cilj rada je vrednovati ekološke indikatore uz argumentiranje njihove važnosti za postizanje održivog ruralnog razvoja provođenjem ankete među ekspertima i različitim dionicima ruralnog razvoja. U anketi je sudjelovalo 47 ispitanika. Od predloženih je indikatora najvišu prosječnu ocjenu dobio indikator zastupljenost ekološke poljoprivrede u ukupnoj poljoprivredi (4, 15), a slijede ga biološka raznolikost biljnih i životinjskih vrsta (4, 09), upotreba pesticida/ha (4, 06), broj uvjetnih grla/ha korištenog zemljišta (4, 00) i upotreba mineralnih gnojiva/ha (3, 91)."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::007d183f5b5b4466cf987bcd50e0c6e3","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/877152"]}],"language":{"classid":"hrv","classname":"Croatian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813176553,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2017-05-25T04:42:10Z","harvestDate":"2021-07-11T01:25:30.597Z","identifier":"877152","metadataNamespace":""}},"originalId":["877152","50|57a035e5b1ae::007d183f5b5b4466cf987bcd50e0c6e3"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2017-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Prijedlog ekoloških indikatora za mjerenje održivog ruralnog razvoja"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Proposal of the environmental indicators for measuring sustainable rural development"}]} +{"author":[{"affiliation":[],"fullname":"Petric, Bartul","name":"Bartul","pid":[],"rank":1,"surname":"Petric"},{"affiliation":[],"fullname":"Petric, Nedjeljka","name":"Nedjeljka","pid":[],"rank":2,"surname":"Petric"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Oliver Le Faucheux"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1977-01-01"},"dateofcollection":"2021-07-11T00:42:48.748Z","dateoftransformation":"2021-07-11T02:01:00.178Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"In the present paper it was studied the waste sludge separation and its use, with the purpose to prevent the sea water pollution."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::012b5c63f06424e2dc1c82ac6a7554d2","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1977-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/314877"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813187318,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2007-12-18T09:21:18Z","harvestDate":"2021-07-11T00:42:48.748Z","identifier":"314877","metadataNamespace":""}},"originalId":["314877","50|57a035e5b1ae::012b5c63f06424e2dc1c82ac6a7554d2"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"1977-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Treatment of Waste Sea Water in Calcium Carbide Industry"}]} +{"author":[{"affiliation":[],"fullname":"Erstić, Marijana","name":"Marijana","pid":[],"rank":1,"surname":"Erstić"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-01-01"},"dateofcollection":"2021-07-11T01:23:38.842Z","dateoftransformation":"2021-07-11T02:01:53.063Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Die Filme Michael Hanekes scheinen vor allem mit jenen des späten Pasolini zu korrespondieren, allen voran mit SALÒ aber auch mit TEOREMA, jenem Film, in dem sich Pasolini dem italienischen Großbürgertum der ausgehenden 1960er Jahre widmet. Erzählt wird in TEOREMA die Geschichte einer zu Beginn des Films anscheinend intakten Familie, die sich durch die Begegnung mit einem Unbekannten der eigenen Leere bewusst wird und auseinanderfällt. Der Film endet mit dem Schrei eines der Protagonisten, der hier jedoch weniger ein neues Leben bedeutet, als vielmehr eine Reaktion auf die repressiven und normativen Mechanismen der Gesellschaft. Hanekes Filme LEMMINGE, TEIL II und DER SIEBENTE KONTINENT gehen jeweils unter-schiedlich von einer vergleichbaren Prämisse einer leeren Familie aus. Doch während die Schreie in den LEMMINGEN immer lauter werden, verstummen sie im SIEBENTEN KONTINENT schließlich. In allen benannten Filmen hat das Werk Francis Bacons eine besondere Rolle gespielt und wurde entweder explizit (TEOREMA, LEMMINGE II) oder implizit (DER SIEBENTE KONTINENT) thematisiert. Der Vortrag geht den Bildern des (stummen) Schreis in den genannten Filmen nach."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::023bc0883fb1075fe0a86e829b6c5d97","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/848053"]}],"language":{"classid":"deu/ger","classname":"German","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813203778,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2016-12-06T18:47:39Z","harvestDate":"2021-07-11T01:23:38.842Z","identifier":"848053","metadataNamespace":""}},"originalId":["848053","50|57a035e5b1ae::023bc0883fb1075fe0a86e829b6c5d97"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2014-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"\"Teorema\" von Pier Paolo Pasolini oder: Ein Schrei auf Francis Bacons Spuren"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"\"Teorema\" by Pier Paolo Pasolini"}]} \ No newline at end of file From 327cddde336a2a590efeba3a76febb1cec36711b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 3 Aug 2021 10:44:13 +0200 Subject: [PATCH 024/166] Hosted By Map - refactoring --- .../graph/hostedbymap/model/EntityInfo.java | 97 +++++++++---------- 1 file changed, 48 insertions(+), 49 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java index 7bf9d0598..1facaa792 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java @@ -1,69 +1,68 @@ + package eu.dnetlib.dhp.oa.graph.hostedbymap.model; import java.io.Serializable; public class EntityInfo implements Serializable { - private String id; - private String journal_id; - private String name; - private Boolean openaccess; - private String hb_id; + private String id; + private String journal_id; + private String name; + private Boolean openaccess; + private String hb_id; + public String getId() { + return id; + } + public void setId(String id) { + this.id = id; + } - public String getId() { - return id; - } + public String getJournal_id() { + return journal_id; + } - public void setId(String id) { - this.id = id; - } + public void setJournal_id(String journal_id) { + this.journal_id = journal_id; + } - public String getJournal_id() { - return journal_id; - } + public String getName() { + return name; + } - public void setJournal_id(String journal_id) { - this.journal_id = journal_id; - } + public void setName(String name) { + this.name = name; + } - public String getName() { - return name; - } + public Boolean getOpenaccess() { + return openaccess; + } - public void setName(String name) { - this.name = name; - } + public void setOpenaccess(Boolean openaccess) { + this.openaccess = openaccess; + } - public Boolean getOpenaccess() { - return openaccess; - } + public String getHb_id() { + return hb_id; + } - public void setOpenaccess(Boolean openaccess) { - this.openaccess = openaccess; - } + public void setHb_id(String hb_id) { + this.hb_id = hb_id; + } - public String getHb_id() { - return hb_id; - } + public static EntityInfo newInstance(String id, String j_id, String name) { + return newInstance(id, j_id, name, false); - public void setHb_id(String hb_id) { - this.hb_id = hb_id; - } + } - public static EntityInfo newInstance(String id, String j_id, String name){ - return newInstance(id, j_id, name, false); + public static EntityInfo newInstance(String id, String j_id, String name, Boolean openaccess) { + EntityInfo pi = new EntityInfo(); + pi.id = id; + pi.journal_id = j_id; + pi.name = name; + pi.openaccess = openaccess; + pi.hb_id = ""; + return pi; - } - - public static EntityInfo newInstance(String id, String j_id, String name, Boolean openaccess){ - EntityInfo pi = new EntityInfo(); - pi.id = id; - pi.journal_id = j_id; - pi.name = name; - pi.openaccess = openaccess; - pi.hb_id = ""; - return pi; - - } + } } From 461b8a29a0e52d92cb12aaec7e481844f9124c51 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 3 Aug 2021 10:46:51 +0200 Subject: [PATCH 025/166] removed not needed class --- .../hostedbymap/model/DatasourceInfo.java | 72 ------------------- 1 file changed, 72 deletions(-) delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java deleted file mode 100644 index f5ac8f70c..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DatasourceInfo.java +++ /dev/null @@ -1,72 +0,0 @@ -package eu.dnetlib.dhp.oa.graph.hostedbymap.model; - -import java.io.Serializable; - -public class DatasourceInfo implements Serializable { - private String id; - private String officialname; - private String issn; - private String eissn; - private String lissn; - private Boolean openAccess; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getOfficialname() { - return officialname; - } - - public void setOfficialname(String officialname) { - this.officialname = officialname; - } - - public String getIssn() { - return issn; - } - - public void setIssn(String issn) { - this.issn = issn; - } - - public String getEissn() { - return eissn; - } - - public void setEissn(String eissn) { - this.eissn = eissn; - } - - public String getLissn() { - return lissn; - } - - public void setLissn(String lissn) { - this.lissn = lissn; - } - - public Boolean getOpenAccess() { - return openAccess; - } - - public void setOpenAccess(Boolean openAccess) { - this.openAccess = openAccess; - } - - public static DatasourceInfo newInstance(String id, String officialname, String issn, String eissn, String lissn){ - DatasourceInfo di = new DatasourceInfo(); - di.id = id; - di.officialname = officialname; - di.issn = (issn != null) ? issn : "" ; - di.eissn = (eissn != null) ? eissn:""; - di.lissn = (lissn != null) ? lissn : ""; - di.openAccess = false; - - return di; - } -} From 9831725073e21ed89e014072c92cc0fc57642cee Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 3 Aug 2021 11:02:17 +0200 Subject: [PATCH 026/166] Hosted By Map - remove from workflow a step not needed. The hbm will be take care also of the integration of the unibi list of gold openaccess journals --- .../dhp/doiboost/preprocess/oozie_app/workflow.xml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index 4de1a2185..de433b038 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -63,7 +63,6 @@ - ${wf:conf('resumeFrom') eq 'DownloadGoldIssn'} ${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'} ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'} ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} @@ -77,18 +76,6 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - eu.dnetlib.doiboost.GetCSV - --hdfsNameNode${nameNode} - --fileURL${unibiGoldIssnFileURL} - --hdfsPath${hdfsPath} - --classForNameeu.dnetlib.doiboost.UnibiGoldModel - - - - - From a7bf314fd2a8c0fb1075eb2738023e7d802d0364 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:13:30 +0200 Subject: [PATCH 027/166] Hosted By Map - added new aggregator to get just one result per datasource id --- .../oa/graph/hostedbymap/Aggregators.scala | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala index af4ac4507..8198b197a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -103,4 +103,40 @@ object Aggregators { transformedData } + def datasourceToSingleIdAggregator: TypedColumn[EntityInfo, EntityInfo] = new Aggregator[EntityInfo, EntityInfo, EntityInfo]{ + override def zero: EntityInfo = EntityInfo.newInstance("","","") + + override def reduce(b: EntityInfo, a:EntityInfo): EntityInfo = { + return merge(b, a) + } + override def merge(b1: EntityInfo, b2: EntityInfo): EntityInfo = { + if (b1 == null){ + return b2 + } + if(b2 == null){ + return b1 + } + if(!b1.getHb_id.equals("")){ + return b1 + } + b2 + + } + override def finish(reduction: EntityInfo): EntityInfo = reduction + override def bufferEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + + override def outputEncoder: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + }.toColumn + + + def datasourceToSingleId(df:Dataset[EntityInfo]): Dataset[EntityInfo] = { + val transformedData : Dataset[EntityInfo] = df + .groupByKey(_.getHb_id)(Encoders.STRING) + .agg(Aggregators.datasourceToSingleIdAggregator) + .map{ + case (id:String , res: EntityInfo) => res + }(Encoders.bean(classOf[EntityInfo])) + + transformedData + } } From 8f7623e77afe2edce3681a81a0283f007f733c9d Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:14:20 +0200 Subject: [PATCH 028/166] Hosted By Map - refactoring and application of the new aggregator --- .../SparkApplyHostedByMapToDatasource.scala | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index 4ecea63f3..fad313f1c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -45,7 +45,7 @@ object SparkApplyHostedByMapToDatasource { val graphPath = parser.get("graphPath") val outputPath = parser.get("outputPath") - val workingPath = parser.get("workingPath") + val preparedInfoPath = parser.get("preparedInfoPath") implicit val formats = DefaultFormats @@ -55,17 +55,15 @@ object SparkApplyHostedByMapToDatasource { implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) val mapper = new ObjectMapper() - val dats : Dataset[Datasource] = spark.read.textFile("$graphPath/datasource") + val dats : Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") .map(r => mapper.readValue(r, classOf[Datasource])) - val pinfo : Dataset[EntityInfo] = spark.read.textFile("$workingPath/preparedInfo") - .map(ei => mapper.readValue(ei, classOf[EntityInfo])) - - + val pinfo : Dataset[EntityInfo] = Aggregators.datasourceToSingleId( spark.read.textFile(preparedInfoPath) + .map(ei => mapper.readValue(ei, classOf[EntityInfo]))) //c. dataset join risultato del passo prima di a per datasource id, gruppo per ds id e cambio compatibilita' se necessario - applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(s"$graphPath/datasource") + applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) } From 8ba8c77f92a82104076649fb3ba4fa80d13fd449 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:14:57 +0200 Subject: [PATCH 029/166] Hosted By Map - refactoring --- .../hostedbymap/SparkApplyHostedByMapToResult.scala | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 37afc319a..312513285 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -29,7 +29,6 @@ object SparkApplyHostedByMapToResult { val i = p.getInstance().asScala if (i.size == 1) { val inst: Instance = i(0) - inst.getHostedby.setKey(ei.getHb_id) inst.getHostedby.setValue(ei.getName) if (ei.getOpenaccess) { @@ -60,7 +59,7 @@ object SparkApplyHostedByMapToResult { val graphPath = parser.get("graphPath") val outputPath = parser.get("outputPath") - val workingPath = parser.get("workingPath") + val preparedInfoPath = parser.get("preparedInfoPath") implicit val formats = DefaultFormats @@ -70,15 +69,15 @@ object SparkApplyHostedByMapToResult { implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) val mapper = new ObjectMapper() - val pubs : Dataset[Publication] = spark.read.textFile("$graphPath/publication") + val pubs : Dataset[Publication] = spark.read.textFile(graphPath + "/publication") .map(r => mapper.readValue(r, classOf[Publication])) - val pinfo : Dataset[EntityInfo] = spark.read.textFile("$workingPath/preparedInfo") + val pinfo : Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) .map(ei => mapper.readValue(ei, classOf[EntityInfo])) //a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance // nel result => salto)con l'hosted by anche access right della instance se openaccess e' true - applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json("$graphPath/publication") + applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) From 1e952cccf644a27180684b62851171ccdfd313d3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:15:43 +0200 Subject: [PATCH 030/166] Hosted By Map - refactoring and deletion of not needed methods --- .../SparkPrepareHostedByInfoToApply.scala | 35 ++----------------- 1 file changed, 3 insertions(+), 32 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index 7395b2450..d342d57b0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -2,7 +2,7 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap import com.fasterxml.jackson.databind.ObjectMapper import eu.dnetlib.dhp.application.ArgumentApplicationParser -import eu.dnetlib.dhp.oa.graph.hostedbymap.model.{DatasourceInfo, EntityInfo} +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.EntityInfo import eu.dnetlib.dhp.schema.oaf.{Journal, Publication} import org.apache.commons.io.IOUtils @@ -17,8 +17,6 @@ import org.slf4j.{Logger, LoggerFactory} object SparkPrepareHostedByInfoToApply { - - implicit val mapEncoderDSInfo: Encoder[DatasourceInfo] = Encoders.bean(classOf[DatasourceInfo]) implicit val mapEncoderPInfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) def getList(id: String, j: Journal, name: String ) : List[EntityInfo] = { @@ -58,33 +56,6 @@ object SparkPrepareHostedByInfoToApply { toEntityItem(c.keys.head, c.values.head) } - def explodeJournalInfo(input: DatasourceInfo): List[EntityInfo] = { - var lst : List[EntityInfo] = List() - if (input.getEissn != null && !input.getEissn.equals("")){ - lst = EntityInfo.newInstance(input.getId, input.getEissn, input.getOfficialname, input.getOpenAccess) :: lst - } - if (input.getLissn != null && !input.getLissn.equals("")){ - lst = EntityInfo.newInstance(input.getId, input.getLissn, input.getOfficialname, input.getOpenAccess) :: lst - } - if (input.getIssn != null && !input.getIssn.equals("")){ - lst = EntityInfo.newInstance(input.getId, input.getIssn, input.getOfficialname, input.getOpenAccess) :: lst - } - - lst - } - - def joinDsandHBM(left:Dataset[DatasourceInfo], right:Dataset[HostedByItemType]): Dataset[EntityInfo] = { - left.joinWith(right, - left.col("id").equalTo(right.col("id")), "left") - .map(t2 => { - val dsi : DatasourceInfo = t2._1 - if(t2._2 != null){ - val hbi : HostedByItemType = t2._2 - dsi.setOpenAccess(hbi.openAccess) - } - dsi - }).flatMap(explodeJournalInfo) - } def toEntityItem(journal_id: String , hbi: HostedByItemType): EntityInfo = { @@ -123,7 +94,7 @@ object SparkPrepareHostedByInfoToApply { val graphPath = parser.get("graphPath") - val outputPath = parser.get("outputPath") + val outputPath = parser.get("preparedInfoPath") val hostedByMapPath = parser.get("hostedByMapPath") @@ -139,7 +110,7 @@ object SparkPrepareHostedByInfoToApply { val hostedByInfo:Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) //STEP2: creare la mappa publication id issn, eissn, lissn esplosa - val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, "$graphPath/publication") + val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") //STEP3: join resultInfo con hostedByInfo sul journal_id dal result con left // e riduzione di tutti i result con lo stesso id in una unica entry con aggiunto l'id della datasource From eccf3851b0352b557a4cce3dbd90d0fcf8ee3408 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:16:30 +0200 Subject: [PATCH 031/166] Hosted By Map - refactoring --- .../dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index ca5e82a4a..de41ebf28 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -23,10 +23,6 @@ object SparkProduceHostedByMap { implicit val tupleForJoinEncoder: Encoder[(String, HostedByItemType)] = Encoders.tuple(Encoders.STRING, Encoders.product[HostedByItemType]) - - - - def toHostedByItemType(input: ((HostedByInfo, HostedByInfo), HostedByInfo)) : HostedByItemType = { val openaire: HostedByInfo = input._1._1 val doaj: HostedByInfo = input._1._2 @@ -217,7 +213,7 @@ object SparkProduceHostedByMap { .union(doajHostedByDataset(spark, workingDirPath + "/doaj")) .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) .map(hbi => toHostedByMap(hbi))(Encoders.STRING) - .rdd.saveAsTextFile(outputPath + "/hostedByMap", classOf[GzipCodec]) + .rdd.saveAsTextFile(outputPath , classOf[GzipCodec]) } From 67ba4c40e05bc90da931f7a621e11038849453db Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:17:28 +0200 Subject: [PATCH 032/166] Hosted By Map - added parameter resources --- .../hostedbymap/hostedby_apply_params.json | 36 ++++++++++++++++++ .../hostedbymap/hostedby_prepare_params.json | 37 +++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json new file mode 100644 index 000000000..e34398290 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_apply_params.json @@ -0,0 +1,36 @@ + +[ + + { + "paramName":"pip", + "paramLongName":"preparedInfoPath", + "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramRequired": true + }, + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "m", + "paramLongName": "master", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + }, + + { + "paramName": "gp", + "paramLongName": "graphPath", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + }, + { + "paramName": "out", + "paramLongName": "outputPath", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + } +] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json new file mode 100644 index 000000000..9809d71b5 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/hostedby_prepare_params.json @@ -0,0 +1,37 @@ + +[ + + { + "paramName":"hbmp", + "paramLongName":"hostedByMapPath", + "paramDescription": "the path to the datasource ", + "paramRequired": true + }, + + { + "paramName":"pip", + "paramLongName":"preparedInfoPath", + "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramRequired": true + }, + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": false + }, + { + "paramName": "m", + "paramLongName": "master", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + }, + + { + "paramName": "gp", + "paramLongName": "graphPath", + "paramDescription": "true if the spark session is managed, false otherwise", + "paramRequired": true + } +] From e94ae0b1de11f31dad6367605538371d9fe13d6b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:18:11 +0200 Subject: [PATCH 033/166] Hosted By Map - extention of the workflow to consider also the application of the map to publications and datasources --- .../graph/hostedbymap/oozie_app/workflow.xml | 106 ++++++++++++++++-- 1 file changed, 99 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index ecf6c3b31..30e500da3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -9,6 +9,10 @@ outputPath the output path
+ + hostedByMapPath + the output path + sparkDriverMemory memory for driver process @@ -65,23 +69,30 @@ - + + + + + ${wf:conf('resumeFrom') eq 'ProduceHBM'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + - - + + - @@ -134,13 +145,94 @@ --datasourcePath${sourcePath}/datasource --workingPath${workingDir} - --outputPath${outputPath} + --outputPath${hostedByMapPath} --masteryarn-cluster - + + + + yarn-cluster + Prepare info to apply the hbm + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkPrepareHostedByInfoToApply + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --hostedByMapPath${hostedByMapPath} + --preparedInfoPath${workingDir}/preparedInfo + --graphPath${sourcePath} + --masteryarn-cluster + + + + + + + + + + + + + yarn-cluster + Apply hbm to result + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToResult + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --outputPath${outputPath}/publication + --preparedInfoPath${workingDir}/preparedInfo + --graphPath${sourcePath} + --masteryarn-cluster + + + + + + + + yarn-cluster + Apply hbm to datasource + eu.dnetlib.dhp.oa.graph.hostedbymap.SparkApplyHostedByMapToDatasource + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --outputPath${outputPath}/datasource + --preparedInfoPath${workingDir}/preparedInfo + --graphPath${sourcePath} + --masteryarn-cluster + + + + + + From eb8c3f85944c8b915d455270d9a3ec2e26047244 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:19:17 +0200 Subject: [PATCH 034/166] Hosted By Map - test modified because of the application of the new aggregator on datasources --- .../java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala index b78eb978b..c48c3b35d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala @@ -98,7 +98,7 @@ class TestApply extends java.io.Serializable{ val dats_ds :Dataset[Datasource] = spark.read.textFile(dats).map(p => mapper.readValue(p, classOf[Datasource])) - val hbm_ds :Dataset[EntityInfo] = spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo])) + val hbm_ds :Dataset[EntityInfo] = Aggregators.datasourceToSingleId(spark.read.textFile(hbm).map(p => mapper.readValue(p, classOf[EntityInfo]))) assertEquals(10, dats_ds.count()) @@ -122,7 +122,7 @@ class TestApply extends java.io.Serializable{ }else{ assertTrue(pa.getOpenairecompatibility().getClassid.equals(pb.getOpenairecompatibility.getClassid)) - assertTrue(pa.getOpenairecompatibility().getClassname.equals(pb.getOpenairecompatibility.getClassid)) + assertTrue(pa.getOpenairecompatibility().getClassname.equals(pb.getOpenairecompatibility.getClassname)) } }) From c7b71647c63cab956cb183775752df7c2b9d9b02 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 10:20:02 +0200 Subject: [PATCH 035/166] Hosted By Map - modification of the resource for testing the presence of only one entry per datasource id --- .../eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json index 6e1cce3b6..64e2433ed 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json @@ -1,2 +1,3 @@ {"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} +{"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} {"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c"} From 1965e4eece11830cbf373b5345030c1198f72fb8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 4 Aug 2021 18:29:03 +0200 Subject: [PATCH 036/166] new workflow for downloading the dump of crossref and unpack it --- .../oozie_app/config-default.xml | 42 ++++++ .../downloadandunpack/oozie_app/download.sh | 2 + .../downloadandunpack/oozie_app/mock.sh | 2 + .../downloadandunpack/oozie_app/workflow.xml | 121 ++++++++++++++++++ .../doiboost/preprocess/oozie_app/download.sh | 2 +- 5 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml new file mode 100644 index 000000000..508202e30 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml @@ -0,0 +1,42 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + oozie.launcher.mapreduce.user.classpath.first + true + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + "com.cloudera.spark.lineage.NavigatorAppListener" + + + spark2SqlQueryExecutionListeners + "com.cloudera.spark.lineage.NavigatorQueryListener" + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh new file mode 100644 index 000000000..1bb7aff1f --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh @@ -0,0 +1,2 @@ +#!/bin/bash +curl -LSs -H "Crossref-Plus-API-Token: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwOi8vY3Jvc3NyZWYub3JnLyIsImF1ZCI6Im1kcGx1cyIsImp0aSI6Ijk3YTZkNGVkLTg5MjktNGQ2Yi05NWY1LTY2YmMyNDgzNTRjNCJ9.5DPM4gRibUBYBtrUSpRz3RGHYVB-8f61jQBW_q-r-hs" $1 | hdfs dfs -put - $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh new file mode 100644 index 000000000..30386d613 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh @@ -0,0 +1,2 @@ +#!/bin/bash +curl -LSs $1 | hdfs dfs -put - $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml new file mode 100644 index 000000000..91de3bfb3 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml @@ -0,0 +1,121 @@ + + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + + + crossrefdumpfilename + the Crossref input path + + + crossrefDumpPath + the Crossref dump path + + + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${url} + ${crossrefDumpPath} + ${crossrefdumpfilename} + HADOOP_USER_NAME=${wf:user()} + download.sh + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords + --hdfsServerUri${nameNode} + --crossrefFileNameTarGz${crossrefdumpfilename} + --workingPath${crossrefDumpPath} + --outputPath${crossrefDumpPath}/files/ + + + + + + + + yarn-cluster + cluster + SparkUnpackCrossrefEntries + eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn-cluster + --sourcePath${crossrefDumpPath}/files + --targetPath${crossrefDumpPath}/crossref_unpack/ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh index dfb0db708..30386d613 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh @@ -1,2 +1,2 @@ -#!bin/bash +#!/bin/bash curl -LSs $1 | hdfs dfs -put - $2/$3 \ No newline at end of file From 5faeefbda8160685a16d7628d68fa534f03146c7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 5 Aug 2021 10:54:03 +0200 Subject: [PATCH 037/166] added script to download the dump,changed the workflow input paramenters --- .../crossref/ExtractCrossrefRecords.java | 8 +- .../oozie_app/download.sh | 2 + .../oozie_app/workflow.xml | 119 +++++++++--------- 3 files changed, 68 insertions(+), 61 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java index c7cae1fcb..c7d3770a0 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java @@ -29,16 +29,16 @@ public class ExtractCrossrefRecords { "/eu/dnetlib/dhp/doiboost/crossref_dump_reader.json"))); parser.parseArgument(args); final String hdfsServerUri = parser.get("hdfsServerUri"); - final String workingPath = parser.get("workingPath"); + final String workingPath = hdfsServerUri.concat(parser.get("workingPath")); final String outputPath = parser.get("outputPath"); final String crossrefFileNameTarGz = parser.get("crossrefFileNameTarGz"); - Path hdfsreadpath = new Path(hdfsServerUri.concat(crossrefFileNameTarGz)); + Path hdfsreadpath = new Path(workingPath.concat("/").concat(crossrefFileNameTarGz)); Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsServerUri.concat(workingPath)); + conf.set("fs.defaultFS", workingPath); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); - FileSystem fs = FileSystem.get(URI.create(hdfsServerUri.concat(workingPath)), conf); + FileSystem fs = FileSystem.get(URI.create(workingPath), conf); FSDataInputStream crossrefFileStream = fs.open(hdfsreadpath); try (TarArchiveInputStream tais = new TarArchiveInputStream( new GzipCompressorInputStream(crossrefFileStream))) { diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh new file mode 100644 index 000000000..662f5313c --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh @@ -0,0 +1,2 @@ +#!/bin/bash +curl -LSs -H "Crossref-Plus-API-Token: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwOi8vY3Jvc3NyZWYub3JnLyIsImF1ZCI6Im1kcGx1cyIsImp0aSI6Ijk3YTZkNGVkLTg5MjktNGQ2Yi05NWY1LTY2YmMyNDgzNTRjNCJ9.5DPM4gRibUBYBtrUSpRz3RGHYVB-8f61jQBW_q-r-hs" $1 | hdfs dfs -put $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml index 506d86a08..2d3ad6bc3 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml @@ -1,13 +1,5 @@ - + - - crossrefDumpPath - the working dir base path - - - inputPathCrossref - the working dir base path - sparkDriverMemory memory for driver process @@ -18,27 +10,82 @@ sparkExecutorCores - 2 number of cores used by single executor + + + crossrefdumpfilename + the Crossref input path + + + crossrefDumpPath + the Crossref dump path + + + - + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${url} + ${crossrefDumpPath} + ${crossrefdumpfilename} + HADOOP_USER_NAME=${wf:user()} + download.sh + + + + + + ${jobTracker} ${nameNode} eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords --hdfsServerUri${nameNode} - --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz + --crossrefFileNameTarGz${crossrefdumpfilename} --workingPath${crossrefDumpPath} - --outputPath${workingDir}/files/ + --outputPath${crossrefDumpPath}/files/ @@ -48,7 +95,7 @@ yarn-cluster cluster - SparkGenerateCrossrefDataset + SparkUnpackCrossrefEntries eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries dhp-doiboost-${projectVersion}.jar @@ -63,56 +110,14 @@ --masteryarn-cluster --sourcePath${crossrefDumpPath}/files - --targetPath${inputPathCrossref}/crossref_ds - - - - - - - - - yarn-cluster - cluster - SparkGenerateCrossrefDataset - eu.dnetlib.doiboost.crossref.GenerateCrossrefDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${inputPathCrossref}/crossref_ds - --targetPath${inputPathCrossref}/crossref_ds_updates + --targetPath${crossrefDumpPath}/crossref_unpack/ - - - - - - - - - - - - - - - - \ No newline at end of file From bd096f51706b60f740430c007a9d3c67ec66d519 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 5 Aug 2021 10:55:43 +0200 Subject: [PATCH 038/166] removed not needed param file --- .../generate_dataset_params.json | 21 ------------------- 1 file changed, 21 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json deleted file mode 100644 index 63e080337..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json +++ /dev/null @@ -1,21 +0,0 @@ -[ - { - "paramName": "s", - "paramLongName": "sourcePath", - "paramDescription": "the source mdstore path", - "paramRequired": true - }, - - { - "paramName": "t", - "paramLongName": "targetPath", - "paramDescription": "the target mdstore path", - "paramRequired": true - }, - { - "paramName": "m", - "paramLongName": "master", - "paramDescription": "the master name", - "paramRequired": true - } -] \ No newline at end of file From b7079804cb4e55b1d027c5e1857bfc8f000055d7 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 9 Aug 2021 11:34:35 +0200 Subject: [PATCH 039/166] CrossrefDump - put token among the parameters --- .../doiboost/crossref_dump_reader/oozie_app/workflow.xml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml index 2d3ad6bc3..6e4f17912 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml @@ -22,6 +22,10 @@ crossrefDumpPath the Crossref dump path + + crossrefdumptoken + the token for the API dump path + @@ -37,7 +41,7 @@ - + @@ -69,6 +73,7 @@ ${url} ${crossrefDumpPath} ${crossrefdumpfilename} + ${crossrefdumptoken} HADOOP_USER_NAME=${wf:user()} download.sh From 54a6cbb24497ae4aa52b93a33f961ce6d571bcfd Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 9 Aug 2021 11:41:10 +0200 Subject: [PATCH 040/166] CrossrefDump - put token among the parameters --- .../dhp/doiboost/crossref_dump_reader/oozie_app/download.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh index 662f5313c..2ce704283 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh @@ -1,2 +1,2 @@ #!/bin/bash -curl -LSs -H "Crossref-Plus-API-Token: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwOi8vY3Jvc3NyZWYub3JnLyIsImF1ZCI6Im1kcGx1cyIsImp0aSI6Ijk3YTZkNGVkLTg5MjktNGQ2Yi05NWY1LTY2YmMyNDgzNTRjNCJ9.5DPM4gRibUBYBtrUSpRz3RGHYVB-8f61jQBW_q-r-hs" $1 | hdfs dfs -put $2/$3 \ No newline at end of file +curl -LSs -H "Crossref-Plus-API-Token: Bearer $4" $1 | hdfs dfs -put $2/$3 \ No newline at end of file From 964f97ed4d16db8a95555a7051f1a2d18181c418 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 9 Aug 2021 11:53:06 +0200 Subject: [PATCH 041/166] cleanup --- dhp-workflows/pom.xml | 2 -- 1 file changed, 2 deletions(-) diff --git a/dhp-workflows/pom.xml b/dhp-workflows/pom.xml index cfdf36573..22ee77619 100644 --- a/dhp-workflows/pom.xml +++ b/dhp-workflows/pom.xml @@ -24,8 +24,6 @@ dhp-dedup-openaire dhp-enrichment dhp-graph-provision - - dhp-blacklist dhp-stats-update dhp-stats-promote From da20fceaf7aefbee08844e9257b47ba97e5e2961 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 9 Aug 2021 11:53:45 +0200 Subject: [PATCH 042/166] removed all the part related to the crossref dump download since it is done in a separate workflow --- .../generate_dataset_params.json | 21 --- .../oozie_app/config-default.xml | 42 ------ .../oozie_app/workflow.xml | 118 ----------------- .../oozie_app/config-default.xml | 42 ------ .../downloadandunpack/oozie_app/download.sh | 2 - .../downloadandunpack/oozie_app/mock.sh | 2 - .../downloadandunpack/oozie_app/workflow.xml | 121 ------------------ .../doiboost/preprocess/oozie_app/download.sh | 2 - .../preprocess/oozie_app/workflow.xml | 76 +---------- 9 files changed, 2 insertions(+), 424 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml delete mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json deleted file mode 100644 index 63e080337..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json +++ /dev/null @@ -1,21 +0,0 @@ -[ - { - "paramName": "s", - "paramLongName": "sourcePath", - "paramDescription": "the source mdstore path", - "paramRequired": true - }, - - { - "paramName": "t", - "paramLongName": "targetPath", - "paramDescription": "the target mdstore path", - "paramRequired": true - }, - { - "paramName": "m", - "paramLongName": "master", - "paramDescription": "the master name", - "paramRequired": true - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml deleted file mode 100644 index 508202e30..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - oozie.launcher.mapreduce.user.classpath.first - true - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - "com.cloudera.spark.lineage.NavigatorAppListener" - - - spark2SqlQueryExecutionListeners - "com.cloudera.spark.lineage.NavigatorQueryListener" - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml deleted file mode 100644 index 506d86a08..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ /dev/null @@ -1,118 +0,0 @@ - - - - crossrefDumpPath - the working dir base path - - - inputPathCrossref - the working dir base path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - 2 - number of cores used by single executor - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords - --hdfsServerUri${nameNode} - --crossrefFileNameTarGz${crossrefDumpPath}/crossref.tar.gz - --workingPath${crossrefDumpPath} - --outputPath${workingDir}/files/ - - - - - - - - yarn-cluster - cluster - SparkGenerateCrossrefDataset - eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${crossrefDumpPath}/files - --targetPath${inputPathCrossref}/crossref_ds - - - - - - - - - yarn-cluster - cluster - SparkGenerateCrossrefDataset - eu.dnetlib.doiboost.crossref.GenerateCrossrefDataset - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${inputPathCrossref}/crossref_ds - --targetPath${inputPathCrossref}/crossref_ds_updates - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml deleted file mode 100644 index 508202e30..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/config-default.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - oozie.launcher.mapreduce.user.classpath.first - true - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - "com.cloudera.spark.lineage.NavigatorAppListener" - - - spark2SqlQueryExecutionListeners - "com.cloudera.spark.lineage.NavigatorQueryListener" - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh deleted file mode 100644 index 1bb7aff1f..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/download.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -curl -LSs -H "Crossref-Plus-API-Token: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwOi8vY3Jvc3NyZWYub3JnLyIsImF1ZCI6Im1kcGx1cyIsImp0aSI6Ijk3YTZkNGVkLTg5MjktNGQ2Yi05NWY1LTY2YmMyNDgzNTRjNCJ9.5DPM4gRibUBYBtrUSpRz3RGHYVB-8f61jQBW_q-r-hs" $1 | hdfs dfs -put - $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh deleted file mode 100644 index 30386d613..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/mock.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -curl -LSs $1 | hdfs dfs -put - $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml deleted file mode 100644 index 91de3bfb3..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/downloadandunpack/oozie_app/workflow.xml +++ /dev/null @@ -1,121 +0,0 @@ - - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - - - - crossrefdumpfilename - the Crossref input path - - - crossrefDumpPath - the Crossref dump path - - - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - ${jobTracker} - ${nameNode} - - - mapred.job.queue.name - ${queueName} - - - download.sh - ${url} - ${crossrefDumpPath} - ${crossrefdumpfilename} - HADOOP_USER_NAME=${wf:user()} - download.sh - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords - --hdfsServerUri${nameNode} - --crossrefFileNameTarGz${crossrefdumpfilename} - --workingPath${crossrefDumpPath} - --outputPath${crossrefDumpPath}/files/ - - - - - - - - yarn-cluster - cluster - SparkUnpackCrossrefEntries - eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${crossrefDumpPath}/files - --targetPath${crossrefDumpPath}/crossref_unpack/ - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh deleted file mode 100644 index 30386d613..000000000 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/download.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -curl -LSs $1 | hdfs dfs -put - $2/$3 \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index ecaeda709..3700ce5d9 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -63,14 +63,10 @@ - ${wf:conf('resumeFrom') eq 'Skip'} - ${wf:conf('resumeFrom') eq 'ImportCrossRef'} - ${wf:conf('resumeFrom') eq 'UnpackCrossrefEntries'} - ${wf:conf('resumeFrom') eq 'GenerateCrossrefDataset'} ${wf:conf('resumeFrom') eq 'ResetMagWorkingPath'} ${wf:conf('resumeFrom') eq 'ConvertMagToDataset'} ${wf:conf('resumeFrom') eq 'PreProcessORCID'} - + @@ -79,67 +75,6 @@ - - - ${jobTracker} - ${nameNode} - - - mapred.job.queue.name - ${queueName} - - - download.sh - ${url} - ${crossrefDumpPath} - ${crossrefdumpfilename} - download.sh - - - - - - - - - ${jobTracker} - ${nameNode} - eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords - --hdfsServerUri${nameNode} - --crossrefFileNameTarGz${crossrefdumpfilename} - --workingPath${crossrefDumpPath} - --outputPath${crossrefDumpPath}/files/ - - - - - - - - yarn-cluster - cluster - SparkUnpackCrossrefEntries - eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries - dhp-doiboost-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.sql.shuffle.partitions=3840 - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --masteryarn-cluster - --sourcePath${crossrefDumpPath}/files - --targetPath${crossrefDumpPath}/crossref_unpack/ - - - - - - yarn-cluster @@ -166,14 +101,7 @@ - - - - - - - - + From 577f3b1ac8696abf405de9969f4efbd81d8bba95 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Mon, 9 Aug 2021 11:53:58 +0200 Subject: [PATCH 043/166] added dnet workflows responsible for the graph construction, enrichment, provision --- .../actionset_bipFinderScores.xml | 100 ++ .../dhp/actionmanager/actionset_datacite.xml | 144 +++ .../dhp/actionmanager/actionset_doiboost.xml | 200 ++++ .../actionset_h2020_classification.xml | 132 +++ .../actionset_orcidworks-no-doi.xml | 101 ++ .../dhp/actionmanager/actionset_ror.xml | 89 ++ .../dhp/provision/00_beta_graph_for_IIS.xml | 628 +++++++++++ .../dhp/provision/00_prod_graph_for_IIS.xml | 437 ++++++++ .../eu/dnetlib/dhp/provision/01_IIS.xml | 225 ++++ .../dnetlib/dhp/provision/02_beta_graph.xml | 995 ++++++++++++++++++ .../dnetlib/dhp/provision/02_prod_graph.xml | 778 ++++++++++++++ .../dnetlib/dhp/provision/03_graph2hive.xml | 74 ++ .../dnetlib/dhp/provision/04_graph2solr.xml | 99 ++ .../dnetlib/dhp/provision/05_graph2stats.xml | 100 ++ .../dhp/provision/06_publish_stats.xml | 87 ++ .../eu/dnetlib/dhp/provision/07_broker.xml | 131 +++ 16 files changed, 4320 insertions(+) create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml create mode 100644 dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml new file mode 100644 index 000000000..e4680a0cf --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml @@ -0,0 +1,100 @@ + +
+ + + + + +
+ + Import bipFinder scores + Import bipFinder scores + 30 + + + declares the path holding the BIP SCORE data + + bipScorePath + /data/bip/20201206 + + + + + + + declares the path holding the LATEST GRAPH dump + + latestGraphPath + /tmp/stable_ids/graph/14_graph_blacklisted + + + + + + + prepare action sets + + + [ + { + 'set' : 'bipfinder-scores', + 'jobProperty' : 'export_action_set_bipfinder-scores', + 'enablingProperty' : 'active_bipfinder-scores', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare AS for the bipFinder scores integration + + executeOozieJob + IIS + + { + 'bipScorePath':'bipScorePath', + 'inputPath':'latestGraphPath', + 'outputPath': 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/bipfinder/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/bipfinder' + } + + build-report + + + + + + + update action sets + + + + + + + + + + + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml new file mode 100644 index 000000000..d2ea9d35f --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml @@ -0,0 +1,144 @@ + +
+ + + + + +
+ + Import Datacite ActionSet + Import InfoSpace + 30 + + + set the resume from + + resumeFrom + TransformDatacite + + + + + + + shall the datacite mapping produce the links? + + exportLinks + false + + + + + + + set the path storing the OAF Datacite records + + oafTargetPath + /data/datacite/production/datacite_oaf + + + + + + + set the input path for Datacite content + + datacitePath + /data/datacite + + + + + + + prepare action sets + + + [ + { + 'set' : 'datacite', + 'jobProperty' : 'export_action_set_datacite', + 'enablingProperty' : 'active_datacite', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare a new version of Datacite ActionSet + + executeOozieJob + IIS + + { + 'mainPath' : 'datacitePath', + 'oafTargetPath' : 'oafTargetPath', + 'exportLinks' : 'exportLinks', + 'resumeFrom' : 'resumeFrom' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/datacite_import/oozie_app', + 'sparkExecutorMemory' : '7G' + } + + build-report + + + + + + + prepare a new version of Datacite ActionSet + + executeOozieJob + IIS + + { + 'sourcePath' : 'oafTargetPath', + 'outputPath' : 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/datacite_actionset/oozie_app', + 'sparkExecutorMemory' : '7G' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210723_163342_752 + 2021-07-23T16:44:05+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml new file mode 100644 index 000000000..ce9eb8f4c --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml @@ -0,0 +1,200 @@ + +
+ + + + + +
+ + Import DOIboost + Import InfoSpace + 30 + + + set the input path for MAG + + MAGDumpPath + /data/doiboost/mag-2021-02-15 + + + + + + + set the input path for CROSSREF dump + + crossrefDumpPath + /data/doiboost/crossref/ + + + + + + + set the intermediate path used to process MAG + + intermediatePathMAG + /data/doiboost/input/mag + + + + + + + set the input path for Crossref + + inputPathCrossref + /data/doiboost/input/crossref + + + + + + + set the timestamp for the Crossref incremental harvesting + + crossrefTimestamp + 1607614921429 + + + + + + + set the input path for UnpayWall + + inputPathUnpayWall + /data/doiboost/input/unpayWall + + + + + + + set the input path for ORCID + + inputPathOrcid + /data/orcid_activities_2020/last_orcid_dataset + + + + + + + set the working path for ORCID + + workingPathOrcid + /data/doiboost/input/orcid + + + + + + + set the hostedBy map path + + hostedByMapPath + /data/doiboost/input/hostedBy/hbMap.gz + + + + + + + set the oozie workflow name from which the execution will be resumed + + resumeFrom + ConvertCrossrefToOAF + + + + + + + wait configurations + + + + + + + prepare action sets + + + [ + { + 'set' : 'doiboost', + 'jobProperty' : 'export_action_set_doiboost', + 'enablingProperty' : 'active_doiboost', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare a new version of DOIBoost + + executeOozieJob + IIS + + { + 'crossrefTimestamp' : 'crossrefTimestamp', + 'hostedByMapPath' : 'hostedByMapPath', + 'MAGDumpPath' :'MAGDumpPath', + 'inputPathMAG' : 'intermediatePathMAG', + 'inputPathCrossref' : 'inputPathCrossref', + 'crossrefDumpPath':'crossrefDumpPath', + 'inputPathUnpayWall' : 'inputPathUnpayWall', + 'inputPathOrcid' : 'inputPathOrcid', + 'outputPath' : 'outputPath', + 'workingPathOrcid':'workingPathOrcid', + 'resumeFrom' : 'resumeFrom' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/doiboost_process/oozie_app', + 'workingPath' : '/data/doiboost/process_p', + 'sparkExecutorCores' : '2', + 'sparkExecutorIntersectionMemory' : '12G', + 'sparkExecutorMemory' : '8G', + 'esServer' : '[es_server]', + 'esIndex' : 'crossref' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210714_075237_381 + 2021-07-14T09:51:46+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml new file mode 100644 index 000000000..6d29e25a1 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml @@ -0,0 +1,132 @@ + +
+ + + + + +
+ + Import H2020classification + Import H2020classification + 30 + + + sets the URL to download the project file + + projectFileURL + https://cordis.europa.eu/data/cordis-h2020projects.csv + + + + + + + sets the URL to download the programme file + + programmeFileURL + https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv + + + + + + + sets the URL to download the topics file + + topicFileURL + https://cordis.europa.eu/data/reference/cordisref-h2020topics.xlsx + + + + + + + sets the name of the sheet in the topic file to be read + + sheetName + Topics + + + + + + + wait configurations + + + + + + + prepare action sets + + + [ + { + 'set' : 'h2020classification', + 'jobProperty' : 'export_action_set_h2020classification', + 'enablingProperty' : 'active_h2020classification', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare updates for the H2020 Classification + + executeOozieJob + IIS + + { + 'outputPath': 'outputPath', + 'sheetName':'sheetName', + 'projectFileURL' : 'projectFileURL', + 'programmeFileURL' : 'programmeFileURL', + 'topicFileURL':'topicFileURL' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/project/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/h2020classification', + 'postgresURL':'', + 'postgresUser':'', + 'postgresPassword':'' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210524_084803_740 + 2021-05-24T09:05:50+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml new file mode 100644 index 000000000..c5642dadc --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml @@ -0,0 +1,101 @@ + +
+ + + + + +
+ + Import Orcid + Import InfoSpace + 30 + + + set the hdfs input path + + inputPath + /data/orcid_activities_2020 + + + + + + + set the temporary path where to store the action set + + processOutputPath + /tmp/prod_provision/working_path_orcid_activities + + + + + + + prepare action sets + + + [ + { + 'set' : 'orcidworks-no-doi', + 'jobProperty' : 'export_action_set_orcidworks_no_doi', + 'enablingProperty' : 'active_orcidworks_no_doi', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + prepare updates for the Orcid No Doi + + executeOozieJob + IIS + + { + 'workingPath' : 'inputPath', + 'processOutputPath' : 'processOutputPath', + 'outputPath': 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/orcidnodoi_actionset/oozie_app', + 'spark2GenNoDoiDatasetMaxExecutors' : '200', + 'spark2GenNoDoiDatasetExecutorMemory' : '2G' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210713_170819_470 + 2021-07-13T17:28:26+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml new file mode 100644 index 000000000..4810fda3b --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml @@ -0,0 +1,89 @@ + +
+ + + + + +
+ + Update ROR actionset + Import Infospace + 30 + + + Set the base path containing the no_doi_dataset folder + + inputPath + /data/ror/ror-data-2021-04-06.json + + + + + + + prepare action sets + + + [ + { + 'set' : 'ror', + 'jobProperty' : 'export_action_set_ror', + 'enablingProperty' : 'active_ror', + 'enabled' : 'true' + } + ] + + + + + + + + extract the hdfs output path generated in the previous node + + outputPath + + + + + + + update the ROR actionset + + executeOozieJob + IIS + + { + 'rorJsonInputPath' : 'inputPath', + 'rorActionSetPath': 'outputPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/ror/oozie_app', + 'workingDir': '/tmp/import_ror_actionset_prod' + } + + build-report + + + + + + + update action sets + + + + + + + + wf_20210518_143542_478 + 2021-05-18T14:37:13+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml new file mode 100644 index 000000000..ef2205e32 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml @@ -0,0 +1,628 @@ + +
+ + + + + +
+ + Graph construction for IIS [BETA] + IIS + 30 + + + set blacklist of funder nsPrefixes + + nsPrefixBlacklist + gsrt________,rcuk________ + + + + + + + set the path of the map defining the relations id mappings + + idMappingPath + /data/maps/fct_map.json + + + + + + + Set the target path to store the MERGED graph + + mergedGraphPath + /tmp/beta_inference/graph/01_graph_merged + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/beta_inference/graph/02_graph_raw + + + + + + + Set the target path to store the CLEANED graph + + cleanedFirstGraphPath + /tmp/beta_inference/graph/03_graph_clean_first + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/beta_inference/graph/04_graph_dedup + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/beta_inference/graph/05_graph_consistent + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/beta_inference/graph/06_graph_cleaned + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + scholexplorer-dump,doiboost,orcidworks-no-doi,datacite + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims_PROD + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs_PROD + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims_PROD + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs_PROD + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB_PROD + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs_PROD + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF_PROD + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF_PROD + true + + + + + + + should apply the relations id patching based on the provided idMapping on PROD? + + shouldPatchRelations_PROD + false + + + + + + + set the PROD aggregator content path + + prodContentPath + /tmp/prod_aggregator_for_beta + + + + + + + Set the path containing the PROD AGGREGATOR graph + + prodAggregatorGraphPath + /tmp/beta_inference/graph/00_prod_graph_aggregator + + + + + + + reuse cached ODF claims from the BETA aggregation system + + reuseODFClaims_BETA + true + + + + + + + reuse cached ODF records on HDFS from the BETA aggregation system + + reuseODFhdfs_BETA + true + + + + + + + reuse cached OAF claims from the BETA aggregation system + + reuseOAFClaims_BETA + true + + + + + + + reuse cached OAF records on HDFS from the BETA aggregation system + + reuseOAFhdfs_BETA + true + + + + + + + reuse cached DB content from the BETA aggregation system + + reuseDB_BETA + true + + + + + + + reuse cached OpenOrgs content from the BETA aggregation system + + reuseDBOpenorgs_BETA + true + + + + + + + reuse cached ODF content from the BETA aggregation system + + reuseODF_BETA + true + + + + + + + reuse cached OAF content from the BETA aggregation system + + reuseOAF_BETA + true + + + + + + + should apply the relations id patching based on the provided idMapping on BETA? + + shouldPatchRelations_BETA + false + + + + + + + set the BETA aggregator content path + + betaContentPath + /tmp/beta_aggregator + + + + + + + Set the path containing the BETA AGGREGATOR graph + + betaAggregatorGraphPath + /tmp/beta_inference/graph/00_beta_graph_aggregator + + + + + + + wait configurations + + + + + + + + create the BETA AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'betaAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_BETA', + 'reuseOAFClaims' : 'reuseOAFClaims_BETA', + 'reuseDB' : 'reuseDB_BETA', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_BETA', + 'reuseODF' : 'reuseODF_BETA', + 'reuseODF_hdfs' : 'reuseODFhdfs_BETA', + 'reuseOAF' : 'reuseOAF_BETA', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA', + 'contentPath' : 'betaContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations_BETA', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_inference/working_dir/beta_aggregator' + } + + build-report + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'prodAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_PROD', + 'reuseOAFClaims' : 'reuseOAFClaims_PROD', + 'reuseDB' : 'reuseDB_PROD', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_PROD', + 'reuseODF' : 'reuseODF_PROD', + 'reuseODF_hdfs' : 'reuseODFhdfs_PROD', + 'reuseOAF' : 'reuseOAF_PROD', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD', + 'contentPath' : 'prodContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations_PROD', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_inference/working_dir/prod_aggregator' + } + + build-report + + + + + + + wait configurations + + + + + + + create the AGGREGATOR graph + + executeOozieJob + IIS + + { + 'betaInputGraphPath' : 'betaAggregatorGraphPath', + 'prodInputGraphPath' : 'prodAggregatorGraphPath', + 'graphOutputPath' : 'mergedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/merge/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/merge_graph', + 'priority' : 'BETA' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'mergedGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/beta_inference/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/clean_first' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'cleanedFirstGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/beta_inference/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'dedupGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/beta_inference/working_dir/dedup' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'consistentGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_inference/working_dir/clean' + } + + build-report + + + + + + + + wf_20210730_094240_462 + 2021-07-30T15:04:19+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml new file mode 100644 index 000000000..e5ce3d710 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml @@ -0,0 +1,437 @@ + +
+ + + + + +
+ + Graph construction for IIS [PROD NEW] + IIS + 30 + + + set blacklist of funder nsPrefixes + + nsPrefixBlacklist + conicytf____,dfgf________,gsrt________,innoviris___,miur________,rif_________,rsf_________,sgov________,sfrs________ + + + + + + + set the path of the map defining the relations id mappings + + idMappingPath + /data/maps/fct_map.json + + + + + + + Set the path containing the PROD AGGREGATOR graph + + aggregatorGraphPath + /tmp/prod_inference/graph/00_graph_aggregator + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/prod_inference/graph/01_graph_raw + + + + + + + Set the target path to store the CLEANED graph + + cleanedFirstGraphPath + /tmp/prod_inference/graph/02_graph_clean_first + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/prod_inference/graph/03_graph_dedup + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/prod_inference/graph/04_graph_consistent + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/prod_inference/graph/05_graph_cleaned + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + scholexplorer-dump,doiboost,orcidworks-no-doi,datacite + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs + true + + + + + + + should apply the relations id patching based on the provided idMapping? + + shouldPatchRelations + false + + + + + + + set the PROD aggregator content path + + contentPath + /tmp/prod_aggregator + + + + + + + wait configurations + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'aggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims', + 'reuseOAFClaims' : 'reuseOAFClaims', + 'reuseDB' : 'reuseDB', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs', + 'reuseODF' : 'reuseODF', + 'reuseODF_hdfs' : 'reuseODFhdfs', + 'reuseOAF' : 'reuseOAF', + 'reuseOAF_hdfs' : 'reuseOAFhdfs', + 'contentPath' : 'contentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/prod_inference/working_dir/prod_aggregator' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'aggregatorGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/prod_inference/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_inference/working_dir/clean_first' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'cleanedFirstGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/prod_inference/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'dedupGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/prod_inference/working_dir/dedup' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'consistentGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_inference/working_dir/clean' + } + + build-report + + + + + + + + wf_20210719_165159_86 + 2021-07-19T20:45:09+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml new file mode 100644 index 000000000..126d5f58d --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml @@ -0,0 +1,225 @@ + +
+ + + + + +
+ + IIS main workflow V3 [PROD] + IIS + 30 + + + start + + + + + + + Set a regex of funder shortnames to exclude from the project reference processing + + referenceextraction_project_fundingclass_blacklist_regex + ^DFG::.*$|^CONICYT::.*$|^RSF::.*$|^SGOV::.*$|^GSRT::.*$|^MIUR::.*$|^INNOVIRIS::.*$|^RIF::.*$|^SFRS::.*$ + + + + + + + prepare action sets + + + [ + { + 'set' : 'iis-document-affiliation', + 'jobProperty' : 'export_action_set_id_matched_doc_organizations', + 'enablingProperty' : 'active_document_affiliation', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-projects-main', + 'jobProperty' : 'export_action_set_id_document_referencedProjects', + 'enablingProperty' : 'active_referenceextraction_project', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-datasets-main', + 'jobProperty' : 'export_action_set_id_document_referencedDatasets', + 'enablingProperty' : 'active_referenceextraction_dataset', + 'enabled' : 'true' + }, + { + 'set' : 'iis-researchinitiative', + 'jobProperty' : 'export_action_set_id_document_research_initiative', + 'enablingProperty' : 'active_referenceextraction_researchinitiative', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-similarities', + 'jobProperty' : 'export_action_set_id_document_similarities_standard', + 'enablingProperty' : 'active_documentssimilarity', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-classes', + 'jobProperty' : 'export_action_set_id_document_classes', + 'enablingProperty' : 'active_documentsclassification', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-citations', + 'jobProperty' : 'export_action_set_id_document_referencedDocuments', + 'enablingProperty' : 'active_citationmatching', + 'enabled' : 'true' + }, + { + 'set' : 'iis-document-citations-relations', + 'jobProperty' : 'export_action_set_id_citation_relations', + 'enablingProperty' : 'active_citationmatching_relations', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenceextraction-pdb', + 'jobProperty' : 'export_action_set_id_document_pdb', + 'enablingProperty' : 'active_referenceextraction_pdb', + 'enabled' : 'true' + }, + { + 'set' : 'document_software_url', + 'jobProperty' : 'export_action_set_id_document_software_url', + 'enablingProperty' : 'active_referenceextraction_software_url', + 'enabled' : 'true' + }, + { + 'set' : 'iis-entities-software', + 'jobProperty' : 'export_action_set_id_entity_software', + 'enablingProperty' : 'active_referenceextraction_software_url', + 'enabled' : 'true' + }, + { + 'set' : 'iis-communities', + 'jobProperty' : 'export_action_set_id_document_community', + 'enablingProperty' : 'active_referenceextraction_community', + 'enabled' : 'true' + }, + { + 'set' : 'iis-referenced-patents', + 'jobProperty' : 'export_action_set_id_document_patent', + 'enablingProperty' : 'active_referenceextraction_patent', + 'enabled' : 'true' + }, + { + 'set' : 'iis-entities-patent', + 'jobProperty' : 'export_action_set_id_entity_patent', + 'enablingProperty' : 'active_referenceextraction_patent', + 'enabled' : 'true' + }, + { + 'set' : 'iis-covid-19', + 'jobProperty' : 'export_action_set_id_document_covid19', + 'enablingProperty' : 'active_referenceextraction_covid19', + 'enabled' : 'true' + } + ] + + + + + + + + prepare parameters + + import_islookup_service_location + import_content_objectstores_csv + import_content_object_store_location + import_mdstore_service_location + import_dataset_mdstore_ids_csv + oozie.wf.application.path + /lib/iis/primary/snapshots/2021-06-23 + IIS + /tmp/prod_inference/graph/05_graph_cleaned + import_infospace_graph_location + + import_project_concepts_context_ids_csv + aginfra,beopen,clarin,covid-19,dariah,dh-ch,oa-pg,egi,elixir-gr,enermaps,epos,fam,fet-fp7,fet-h2020,gotriple,instruct,mes,ni,rda,science-innovation-policy,risis,rural-digital-europe,sdsn-gr,sobigdata + + + + + + + IIS main + + iisMainJobV3 + + { + 'cluster' : 'cluster', + 'oozie.wf.application.path' : 'oozie.wf.application.path', + 'referenceextraction_project_fundingclass_blacklist_regex' : 'referenceextraction_project_fundingclass_blacklist_regex', + + 'active_document_affiliation' : 'active_document_affiliation', + 'active_referenceextraction_project' : 'active_referenceextraction_project', + 'active_referenceextraction_dataset' : 'active_referenceextraction_dataset', + 'active_referenceextraction_researchinitiative' : 'active_referenceextraction_researchinitiative', + 'active_documentsclassification' : 'active_documentsclassification', + 'active_documentssimilarity' : 'active_documentssimilarity', + 'active_citationmatching' : 'active_citationmatching', + 'active_citationmatching_relations' : 'active_citationmatching_relations', + 'active_referenceextraction_pdb' : 'active_referenceextraction_pdb', + 'active_referenceextraction_software_url' : 'active_referenceextraction_software_url', + 'active_referenceextraction_community' : 'active_referenceextraction_community', + 'active_referenceextraction_patent' : 'active_referenceextraction_patent', + 'active_referenceextraction_covid19' : 'active_referenceextraction_covid19', + + 'import_content_objectstores_csv' : 'import_content_objectstores_csv', + 'import_content_object_store_location' : 'import_content_object_store_location', + 'import_mdstore_service_location' : 'import_mdstore_service_location', + 'import_islookup_service_location' : 'import_islookup_service_location', + 'import_project_concepts_context_ids_csv' : 'import_project_concepts_context_ids_csv', + 'import_dataset_mdstore_ids_csv' : 'import_dataset_mdstore_ids_csv', + 'import_infospace_graph_location' : 'import_infospace_graph_location', + + 'export_action_set_id_matched_doc_organizations' : 'export_action_set_id_matched_doc_organizations', + 'export_action_set_id_document_referencedDatasets' : 'export_action_set_id_document_referencedDatasets', + 'export_action_set_id_document_referencedProjects' : 'export_action_set_id_document_referencedProjects', + 'export_action_set_id_document_research_initiative' : 'export_action_set_id_document_research_initiative', + 'export_action_set_id_document_similarities_standard' : 'export_action_set_id_document_similarities_standard', + + 'export_action_set_id_document_referencedDocuments' : 'export_action_set_id_document_referencedDocuments', + 'export_action_set_id_document_pdb' : 'export_action_set_id_document_pdb', + 'export_action_set_id_document_software_url' : 'export_action_set_id_document_software_url', + 'export_action_set_id_entity_software' : 'export_action_set_id_entity_software', + 'export_action_set_id_document_community' : 'export_action_set_id_document_community', + 'export_action_set_id_document_patent' : 'export_action_set_id_document_patent', + 'export_action_set_id_entity_patent' : 'export_action_set_id_entity_patent', + 'export_action_set_id_document_covid19' : 'export_action_set_id_document_covid19', + 'export_action_set_id_document_classes' : 'export_action_set_id_document_classes' + } + + false + build-report + + + + + + + update action sets + + + + + + + + wf_20210719_221139_780 + 2021-07-21T01:23:13+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml new file mode 100644 index 000000000..766783f8b --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml @@ -0,0 +1,995 @@ + +
+ + + + + +
+ + Graph Construction [BETA] + Data Provision + 30 + + + set blacklist of funder nsPrefixes + + nsPrefixBlacklist + gsrt________,rcuk________ + + + + + + + set the path of the map defining the relations id mappings + + idMappingPath + /data/maps/fct_map.json + + + + + + + Set the target path to store the MERGED graph + + mergedGraphPath + /tmp/beta_provision/graph/01_graph_merged + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/beta_provision/graph/02_graph_raw + + + + + + + Set the target path to store the the consistent graph cleaned + + cleanedFirstGraphPath + /tmp/beta_provision/graph/03_graph_cleaned + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/beta_provision/graph/04_graph_dedup + + + + + + + Set the target path to store the INFERRED graph + + inferredGraphPath + /tmp/beta_provision/graph/05_graph_inferred + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/beta_provision/graph/06_graph_consistent + + + + + + + Set the target path to store the ORCID enriched graph + + orcidGraphPath + /tmp/beta_provision/graph/07_graph_orcid + + + + + + + Set the target path to store the BULK TAGGED graph + + bulkTaggingGraphPath + /tmp/beta_provision/graph/08_graph_bulktagging + + + + + + + Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph + + affiliationGraphPath + /tmp/beta_provision/graph/09_graph_affiliation + + + + + + + Set the target path to store the COMMUNITY from SELECTED SOURCES graph + + communityOrganizationGraphPath + /tmp/beta_provision/graph/10_graph_comunity_organization + + + + + + + Set the target path to store the FUNDING from SEMANTIC RELATION graph + + fundingGraphPath + /tmp/beta_provision/graph/11_graph_funding + + + + + + + Set the target path to store the COMMUNITY from SEMANTIC RELATION graph + + communitySemRelGraphPath + /tmp/beta_provision/graph/12_graph_comunity_sem_rel + + + + + + + Set the target path to store the COUNTRY enriched graph + + countryGraphPath + /tmp/beta_provision/graph/13_graph_country + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/beta_provision/graph/14_graph_cleaned + + + + + + + Set the target path to store the blacklisted graph + + blacklistedGraphPath + /tmp/beta_provision/graph/15_graph_blacklisted + + + + + + + Set the map of paths for the Bulk Tagging + + bulkTaggingPathMap + {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} + + + + + + + Set the map of associations organization, community list for the propagation of community to result through organization + + propagationOrganizationCommunityMap + {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], + "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]} + + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + scholexplorer-dump,doiboost,orcidworks-no-doi,iis-entities-software,iis-entities-patent,datacite + + + + + + + declares the ActionSet ids to promote in the INFERRED graph + + actionSetIdsIISGraph + iis-researchinitiative,iis-document-citations,iis-document-citations-relations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-projects-main,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19,h2020classification,bipfinder-scores + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims_PROD + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs_PROD + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims_PROD + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs_PROD + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB_PROD + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs_PROD + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF_PROD + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF_PROD + true + + + + + + + should apply the relations id patching based on the provided idMapping on PROD? + + shouldPatchRelations_PROD + true + + + + + + + set the PROD aggregator content path + + prodContentPath + /tmp/prod_aggregator_for_beta + + + + + + + Set the path containing the PROD AGGREGATOR graph + + prodAggregatorGraphPath + /tmp/beta_provision/graph/00_prod_graph_aggregator + + + + + + + reuse cached ODF claims from the BETA aggregation system + + reuseODFClaims_BETA + true + + + + + + + reuse cached ODF records on HDFS from the BETA aggregation system + + reuseODFhdfs_BETA + true + + + + + + + reuse cached OAF claims from the BETA aggregation system + + reuseOAFClaims_BETA + true + + + + + + + reuse cached OAF records on HDFS from the BETA aggregation system + + reuseOAFhdfs_BETA + true + + + + + + + reuse cached DB content from the BETA aggregation system + + reuseDB_BETA + true + + + + + + + reuse cached OpenOrgs content from the BETA aggregation system + + reuseDBOpenorgs_BETA + true + + + + + + + reuse cached ODF content from the BETA aggregation system + + reuseODF_BETA + true + + + + + + + reuse cached OAF content from the BETA aggregation system + + reuseOAF_BETA + true + + + + + + + should apply the relations id patching based on the provided idMapping on BETA? + + shouldPatchRelations_BETA + true + + + + + + + set the BETA aggregator content path + + betaContentPath + /tmp/beta_aggregator + + + + + + + Set the path containing the BETA AGGREGATOR graph + + betaAggregatorGraphPath + /tmp/beta_provision/graph/00_beta_graph_aggregator + + + + + + + wait configurations + + + + + + + + create the BETA AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'betaAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_BETA', + 'reuseOAFClaims' : 'reuseOAFClaims_BETA', + 'reuseDB' : 'reuseDB_BETA', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_BETA', + 'reuseODF' : 'reuseODF_BETA', + 'reuseODF_hdfs' : 'reuseODFhdfs_BETA', + 'reuseOAF' : 'reuseOAF_BETA', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA', + 'contentPath' : 'betaContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations_BETA', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_provision/working_dir/beta_aggregator' + } + + build-report + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'prodAggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims_PROD', + 'reuseOAFClaims' : 'reuseOAFClaims_PROD', + 'reuseDB' : 'reuseDB_PROD', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs_PROD', + 'reuseODF' : 'reuseODF_PROD', + 'reuseODF_hdfs' : 'reuseODFhdfs_PROD', + 'reuseOAF' : 'reuseOAF_PROD', + 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD', + 'contentPath' : 'prodContentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'shouldPatchRelations' : 'shouldPatchRelations_PROD', + 'idMappingPath' : 'idMappingPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/beta_provision/working_dir/prod_aggregator' + } + + build-report + + + + + + + wait configurations + + + + + + + create the AGGREGATOR graph + + executeOozieJob + IIS + + { + 'betaInputGraphPath' : 'betaAggregatorGraphPath', + 'prodInputGraphPath' : 'prodAggregatorGraphPath', + 'graphOutputPath' : 'mergedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/merge/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/merge_graph', + 'priority' : 'BETA' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'mergedGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/beta_provision/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/clean' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'cleanedFirstGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/beta_provision/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + create the INFERRED graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsIISGraph', + 'inputGraphRootPath' : 'dedupGraphPath', + 'outputGraphRootPath' : 'inferredGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/beta_provision/working_dir/promoteActionsIIS' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'inferredGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/beta_provision/working_dir/dedup' + } + + build-report + + + + + + + + propagates ORCID among results linked by allowedsemrels semantic relationships + + executeOozieJob + IIS + + { + 'sourcePath' : 'consistentGraphPath', + 'outputPath': 'orcidGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/orcidtoresultfromsemrel/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/orcid', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo;isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + mark results respecting some rules as belonging to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'orcidGraphPath', + 'outputPath': 'bulkTaggingGraphPath', + 'isLookUpUrl' : 'isLookUpUrl', + 'pathMap' : 'bulkTaggingPathMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/bulktag/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/bulktag' + } + + build-report + + + + + + + creates relashionships between results and organizations when the organizations are associated to institutional repositories + + executeOozieJob + IIS + + { + 'sourcePath' : 'bulkTaggingGraphPath', + 'outputPath': 'affiliationGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/affiliation/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/affiliation', + 'saveGraph' : 'true', + 'blacklist' : 'empty' + } + + build-report + + + + + + + marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap + + executeOozieJob + IIS + + { + 'sourcePath' : 'affiliationGraphPath', + 'outputPath': 'communityOrganizationGraphPath', + 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/community_organization/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/community_organization', + 'saveGraph' : 'true' + } + + build-report + + + + + + + created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects + + executeOozieJob + IIS + + { + 'sourcePath' : 'communityOrganizationGraphPath', + 'outputPath': 'fundingGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/funding/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/funding', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'fundingGraphPath', + 'outputPath': 'communitySemRelGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/community_semrel/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/community_semrel', + 'allowedsemrels' : 'IsSupplementedBy;IsSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from + + executeOozieJob + IIS + + { + 'sourcePath' : 'communitySemRelGraphPath', + 'outputPath': 'countryGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/country/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'workingDir' : '/tmp/beta_provision/working_dir/country', + 'allowedtypes' : 'pubsrepository::institutional', + 'whitelist' : '10|openaire____::e783372970a1dc066ce99c673090ff88;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0', + 'saveGraph' : 'true' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'countryGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/clean' + } + + build-report + + + + + + + removes blacklisted relations + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedGraphPath', + 'outputPath': 'blacklistedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/enrichment/blacklist/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/blacklist', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '' + } + + build-report + + + + + + + + wf_20210803_134357_367 + 2021-08-03T17:08:11+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml new file mode 100644 index 000000000..be6155f2f --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml @@ -0,0 +1,778 @@ + +
+ + + + + +
+ + Graph construction [PROD NEW] + Data Provision + 30 + + + set blacklist of funder nsPrefixes + + nsPrefixBlacklist + conicytf____,dfgf________,gsrt________,innoviris___,miur________,rif_________,rsf_________,sgov________,sfrs________ + + + + + + + Set the path containing the PROD AGGREGATOR graph + + aggregatorGraphPath + /tmp/prod_provision/graph/00_prod_graph_aggregator + + + + + + + Set the target path to store the RAW graph + + rawGraphPath + /tmp/prod_provision/graph/01_graph_raw + + + + + + + Set the target path to store the the consistent graph cleaned + + cleanedFirstGraphPath + /tmp/prod_provision/graph/02_graph_cleaned + + + + + + + Set the target path to store the DEDUPED graph + + dedupGraphPath + /tmp/prod_provision/graph/03_graph_dedup + + + + + + + Set the target path to store the INFERRED graph + + inferredGraphPath + /tmp/prod_provision/graph/04_graph_inferred + + + + + + + Set the target path to store the CONSISTENCY graph + + consistentGraphPath + /tmp/prod_provision/graph/05_graph_consistent + + + + + + + Set the target path to store the ORCID enriched graph + + orcidGraphPath + /tmp/prod_provision/graph/06_graph_orcid + + + + + + + Set the target path to store the BULK TAGGED graph + + bulkTaggingGraphPath + /tmp/prod_provision/graph/07_graph_bulktagging + + + + + + + Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph + + affiliationGraphPath + /tmp/prod_provision/graph/08_graph_affiliation + + + + + + + Set the target path to store the COMMUNITY from SELECTED SOURCES graph + + communityOrganizationGraphPath + /tmp/prod_provision/graph/09_graph_comunity_organization + + + + + + + Set the target path to store the FUNDING from SEMANTIC RELATION graph + + fundingGraphPath + /tmp/prod_provision/graph/10_graph_funding + + + + + + + Set the target path to store the COMMUNITY from SEMANTIC RELATION graph + + communitySemRelGraphPath + /tmp/prod_provision/graph/11_graph_comunity_sem_rel + + + + + + + Set the target path to store the COUNTRY enriched graph + + countryGraphPath + /tmp/prod_provision/graph/12_graph_country + + + + + + + Set the target path to store the CLEANED graph + + cleanedGraphPath + /tmp/prod_provision/graph/13_graph_cleaned + + + + + + + Set the target path to store the blacklisted graph + + blacklistedGraphPath + /tmp/prod_provision/graph/14_graph_blacklisted + + + + + + + Set the map of paths for the Bulk Tagging + + bulkTaggingPathMap + {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} + + + + + + + Set the map of associations organization, community list for the propagation of community to result through organization + + propagationOrganizationCommunityMap + {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], + "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]} + + + + + + + + Set the dedup orchestrator name + + dedupConfig + dedup-similarity-result-decisiontree-v2 + + + + + + + declares the ActionSet ids to promote in the RAW graph + + actionSetIdsRawGraph + scholexplorer-dump,doiboost,orcidworks-no-doi,iis-entities-software,iis-entities-patent,datacite + + + + + + + declares the ActionSet ids to promote in the INFERRED graph + + actionSetIdsIISGraph + iis-researchinitiative,iis-document-citations,iis-document-citations-relations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-projects-main,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19,h2020classification,bipfinder-scores + + + + + + + Set the IS lookup service address + + isLookUpUrl + http://services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + + + + + + + + + reuse cached ODF claims from the PROD aggregation system + + reuseODFClaims + true + + + + + + + reuse cached ODF records on HDFS from the PROD aggregation system + + reuseODFhdfs + true + + + + + + + reuse cached OAF claims from the PROD aggregation system + + reuseOAFClaims + true + + + + + + + reuse cached OAF records on HDFS from the PROD aggregation system + + reuseOAFhdfs + true + + + + + + + reuse cached DB content from the PROD aggregation system + + reuseDB + true + + + + + + + reuse cached OpenOrgs content from the PROD aggregation system + + reuseDBOpenorgs + true + + + + + + + reuse cached ODF content from the PROD aggregation system + + reuseODF + true + + + + + + + reuse cached OAF content from the PROD aggregation system + + reuseOAF + true + + + + + + + set the PROD aggregator content path + + contentPath + /tmp/prod_aggregator + + + + + + + wait configurations + + + + + + + create the PROD AGGREGATOR graph + + executeOozieJob + IIS + + { + 'graphOutputPath' : 'aggregatorGraphPath', + 'isLookupUrl' : 'isLookUpUrl', + 'reuseODFClaims' : 'reuseODFClaims', + 'reuseOAFClaims' : 'reuseOAFClaims', + 'reuseDB' : 'reuseDB', + 'reuseDBOpenorgs' : 'reuseDBOpenorgs', + 'reuseODF' : 'reuseODF', + 'reuseODF_hdfs' : 'reuseODFhdfs', + 'reuseOAF' : 'reuseOAF', + 'reuseOAF_hdfs' : 'reuseOAFhdfs', + 'contentPath' : 'contentPath', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/raw_all/oozie_app', + 'mongoURL' : '', + 'mongoDb' : '', + 'mdstoreManagerUrl' : '', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '', + 'postgresOpenOrgsURL' : '', + 'postgresOpenOrgsUser' : '', + 'postgresOpenOrgsPassword' : '', + 'shouldHashId' : 'true', + 'importOpenorgs' : 'true', + 'workingDir' : '/tmp/prod_provision/working_dir/prod_aggregator' + } + + build-report + + + + + + + create the RAW graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsRawGraph', + 'inputGraphRootPath' : 'aggregatorGraphPath', + 'outputGraphRootPath' : 'rawGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsRaw' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'rawGraphPath', + 'graphOutputPath': 'cleanedFirstGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/clean' + } + + build-report + + + + + + + search for duplicates in the raw graph + + executeOozieJob + IIS + + { + 'actionSetId' : 'dedupConfig', + 'graphBasePath' : 'cleanedFirstGraphPath', + 'dedupGraphPath': 'dedupGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/scan/oozie_app', + 'actionSetIdOpenorgs' : 'dedup-similarity-organization-simple', + 'workingPath' : '/tmp/prod_provision/working_dir/dedup', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G' + } + + build-report + + + + + + + create the INFERRED graph + + executeOozieJob + IIS + + { + 'inputActionSetIds' : 'actionSetIdsIISGraph', + 'inputGraphRootPath' : 'dedupGraphPath', + 'outputGraphRootPath' : 'inferredGraphPath', + 'isLookupUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/actionmanager/wf/main/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'activePromoteDatasetActionPayload' : 'true', + 'activePromoteDatasourceActionPayload' : 'true', + 'activePromoteOrganizationActionPayload' : 'true', + 'activePromoteOtherResearchProductActionPayload' : 'true', + 'activePromoteProjectActionPayload' : 'true', + 'activePromotePublicationActionPayload' : 'true', + 'activePromoteRelationActionPayload' : 'true', + 'activePromoteResultActionPayload' : 'true', + 'activePromoteSoftwareActionPayload' : 'true', + 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', + 'workingDir' : '/tmp/prod_provision/working_dir/promoteActionsIIS' + } + + build-report + + + + + + + mark duplicates as deleted and redistribute the relationships + + executeOozieJob + IIS + + { + 'graphBasePath' : 'inferredGraphPath', + 'graphOutputPath': 'consistentGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/dedup/consistency/oozie_app', + 'workingPath' : '/tmp/prod_provision/working_dir/dedup' + } + + build-report + + + + + + + propagates ORCID among results linked by allowedsemrels semantic relationships + + executeOozieJob + IIS + + { + 'sourcePath' : 'consistentGraphPath', + 'outputPath': 'orcidGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/orcidtoresultfromsemrel/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/orcid', + 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + mark results respecting some rules as belonging to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'orcidGraphPath', + 'outputPath': 'bulkTaggingGraphPath', + 'isLookUpUrl' : 'isLookUpUrl', + 'pathMap' : 'bulkTaggingPathMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/bulktag/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/bulktag' + } + + build-report + + + + + + + creates relashionships between results and organizations when the organizations are associated to institutional repositories + + executeOozieJob + IIS + + { + 'sourcePath' : 'bulkTaggingGraphPath', + 'outputPath': 'affiliationGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/affiliation/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/affiliation', + 'saveGraph' : 'true', + 'blacklist' : 'empty' + } + + build-report + + + + + + + marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap + + executeOozieJob + IIS + + { + 'sourcePath' : 'affiliationGraphPath', + 'outputPath': 'communityOrganizationGraphPath', + 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_organization/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/community_organization', + 'saveGraph' : 'true' + } + + build-report + + + + + + + created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects + + executeOozieJob + IIS + + { + 'sourcePath' : 'communityOrganizationGraphPath', + 'outputPath': 'fundingGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/funding/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/funding', + 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities + + executeOozieJob + IIS + + { + 'sourcePath' : 'fundingGraphPath', + 'outputPath': 'communitySemRelGraphPath', + 'isLookUpUrl' : 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/community_semrel/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/community_semrel', + 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', + 'saveGraph' : 'true' + } + + build-report + + + + + + + associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from + + executeOozieJob + IIS + + { + 'sourcePath' : 'communitySemRelGraphPath', + 'outputPath': 'countryGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/country/oozie_app', + 'sparkExecutorCores' : '3', + 'sparkExecutorMemory' : '10G', + 'workingDir' : '/tmp/prod_provision/working_dir/country', + 'allowedtypes' : 'pubsrepository::institutional', + 'whitelist' : '10|openaire____::e783372970a1dc066ce99c673090ff88;10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0', + 'saveGraph' : 'true' + } + + build-report + + + + + + + clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid + + executeOozieJob + IIS + + { + 'graphInputPath' : 'countryGraphPath', + 'graphOutputPath': 'cleanedGraphPath', + 'isLookupUrl': 'isLookUpUrl' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/clean/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/clean' + } + + build-report + + + + + + + removes blacklisted relations + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedGraphPath', + 'outputPath': 'blacklistedGraphPath' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/enrichment/blacklist/oozie_app', + 'workingDir' : '/tmp/prod_provision/working_dir/blacklist', + 'postgresURL' : '', + 'postgresUser' : '', + 'postgresPassword' : '' + } + + build-report + + + + + + + + wf_20210723_171026_279 + 2021-07-24T00:00:39+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml new file mode 100644 index 000000000..836e69d6f --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml @@ -0,0 +1,74 @@ + +
+ + + + + +
+ + Graph to HiveDB [PROD] + Data Provision + 30 + + + Set the path containing the AGGREGATOR graph + + inputPath + + + + + + + + Set the target path to store the RAW graph + + hiveDbName + + + + + + + + wait configurations + + + + + + + create the AGGREGATOR graph + + executeOozieJob + IIS + + { + 'inputPath' : 'inputPath', + 'hiveDbName' : 'hiveDbName' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/hive/oozie_app', + 'sparkDriverMemory' : '4G', + 'sparkExecutorMemory' : '10G', + 'sparkExecutorCores' : '3' + } + + build-report + + + + + + + + wf_20210728_075001_400 + 2021-07-28T08:04:00+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml new file mode 100644 index 000000000..6cdf41bb6 --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml @@ -0,0 +1,99 @@ + +
+ + + + + +
+ + Update Solr [PROD] + Data Provision + 30 + + + Set the path containing the GRAPH to index + + inputGraphRootPath + /tmp/prod_provision/graph/14_graph_blacklisted + + + + + + + Set the target path to store the RAW graph + + format + DMF + + + + + + + Set the lookup address + + isLookupUrl + http://services.openaire.eu:8280/is/services/isLookUp?wsdl + + + + + + + wait configurations + + + + + + + create the AGGREGATOR graph + + executeOozieJob + IIS + + { + 'inputGraphRootPath' : 'inputGraphRootPath', + 'isLookupUrl' : 'isLookupUrl', + 'format' : 'format' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/provision/oozie_app', + 'sourceMaxRelations' : '1000', + 'targetMaxRelations' : '10000000', + 'relPartitions' : '3000', + 'batchSize' : '2000', + 'relationFilter' : 'isAuthorInstitutionOf,produces,hasAmongTopNSimilarDocuments,cites,isCitedBy', + 'otherDsTypeId' : 'scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource', + 'resumeFrom' : 'prepare_relations', + 'shouldIndex' : 'true', + 'outputFormat' : 'SOLR', + 'sparkDriverMemoryForJoining' : '3G', + 'sparkExecutorMemoryForJoining' : '7G', + 'sparkExecutorCoresForJoining' : '4', + 'sparkDriverMemoryForIndexing' : '2G', + 'sparkExecutorMemoryForIndexing' : '2G', + 'sparkExecutorCoresForIndexing' : '64', + 'sparkNetworkTimeout' : '600', + 'workingDir' : '/tmp/prod_provision/working_dir/update_solr' + } + + build-report + + + + + + + + wf_20210724_062705_620 + 2021-07-25T13:25:37+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml new file mode 100644 index 000000000..4dfae3c7d --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml @@ -0,0 +1,100 @@ + +
+ + + + + +
+ + Update Stats [PROD] + Data Provision + 30 + + + Set the OpenAIRE graph DB name + + openaire_db_name + openaire_prod_yyyyMMdd + + + + + + + Set the STATS DB name + + stats_db_name + openaire_prod_stats_yyyyMMdd + + + + + + + Set the STATS MONITOR DB name + + monitor_db_name + openaire_prod_stats_monitor_yyyyMMdd + + + + + + + Set the STATS OBSERVATORY DB name + + observatory_db_name + openaire_prod_stats_observatory_yyyyMMdd + + + + + + + wait configurations + + + + + + + update the content in the stats DB + + executeOozieJob + IIS + + { + 'openaire_db_name' : 'openaire_db_name', + 'stats_db_name' : 'stats_db_name', + 'monitor_db_name' : 'monitor_db_name', + 'observatory_db_name' : 'observatory_db_name' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/stats_update/oozie_app', + 'hive_timeout' : '15000', + 'stats_tool_api_url' : 'https://services.openaire.eu/stats-tool', + 'stats_db_shadow_name' : 'openaire_prod_stats_shadow', + 'external_stats_db_name' : 'stats_ext', + 'monitor_db_shadow_name' : 'openaire_prod_stats_monitor_shadow', + 'observatory_db_shadow_name' : 'openaire_prod_stats_observatory_shadow', + 'context_api_url' : 'https://services.openaire.eu/openaire' + } + + build-report + + + + + + + + wf_20210725_065608_71 + 2021-07-26T07:35:55+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml new file mode 100644 index 000000000..d8def071f --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml @@ -0,0 +1,87 @@ + +
+ + + + + +
+ + Publish Stats [PROD] + Content Publishing + 35 + + + Set the STATS DB name + + stats_db_name + openaire_prod_stats_yyyyMMdd + + + + + + + Set the STATS MONITOR DB name + + monitor_db_name + openaire_prod_stats_monitor_yyyyMMdd + + + + + + + Set the STATS OBSERVATORY DB name + + observatory_db_name + openaire_prod_stats_observatory_yyyyMMdd + + + + + + + wait configurations + + + + + + + publishes the stats DB to the public schema + + executeOozieJob + IIS + + { + 'stats_db_name' : 'stats_db_name', + 'monitor_db_name' : 'monitor_db_name', + 'observatory_db_name' : 'observatory_db_name' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/graph/stats_promote/oozie_app', + 'hive_timeout' : '150000', + 'stats_tool_api_url' : 'https://services.openaire.eu/stats-tool', + 'stats_db_production_name' : 'openaire_prod_stats', + 'monitor_db_production_name' : 'openaire_prod_stats_monitor', + 'observatory_db_production_name' : 'openaire_prod_stats_observatory' + } + + build-report + + + + + + + + wf_20210727_160728_625 + 2021-07-27T16:53:01+00:00 + SUCCESS + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml new file mode 100644 index 000000000..cf337fd7e --- /dev/null +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml @@ -0,0 +1,131 @@ + +
+ + + + + +
+ + Update Broker events [PROD OCEAN] + Data Provision + 30 + + + Set the path containing the GRAPH to scan + + graphInputPath + + + + + + + + Set the datasource Ids Whitelist + + datasourceIdWhitelist + openaire____::9ecafa3655143cbc4bc75853035cd432,opendoar____::dc6e224a8d74ce03bf301152d6e33e97,openaire____::09da65eaaa6deac2f785df1e0ae95a06,openaire____::3db634fc5446f389d0b826ea400a5da6,openaire____::5a38cb462ac487bf26bdb86009fe3e74,openaire____::3c29379cc184f66861e858bc7aa9615b,openaire____::4657147e48a1f32637bfe3743bce76c6,openaire____::c3267ea1c3f378c456209b6df241624e,opendoar____::358aee4cc897452c00244351e4d91f69,re3data_____::7b0ad08687b2c960d5aeef06f811d5e6,opendoar____::798ed7d4ee7138d49b8828958048130a,opendoar____::6f4922f45568161a8cdf4ad2299f6d23,opendoar____::4aa0e93b918848be0b7728b4b1568d8a,openaire____::02b55e4f52388520bfe11f959f836e68 + + + + + + + Set the datasource type Whitelist + + datasourceTypeWhitelist + pubsrepository::unknown,pubsrepository::institutional,pubsrepository::thematic,datarepository::unknown,orprepository,softwarerepository + + + + + + + Set the datasource Id Blacklist + + datasourceIdBlacklist + - + + + + + + + Set the TOPIC whitelist (* = all topics) + + topicWhitelist + ENRICH/MISSING/SUBJECT/DDC,ENRICH/MISSING/SUBJECT/JEL,ENRICH/MISSING/SUBJECT/MESHEUROPMC,ENRICH/MISSING/PUBLICATION_DATE,ENRICH/MISSING/PID,ENRICH/MISSING/PROJECT,ENRICH/MISSING/SUBJECT/ACM,ENRICH/MISSING/SUBJECT/ARXIV,ENRICH/MISSING/OPENACCESS_VERSION,ENRICH/MISSING/AUTHOR/ORCID,ENRICH/MISSING/ABSTRACT,ENRICH/MORE/SUBJECT/ACM,ENRICH/MORE/SUBJECT/ARXIV,ENRICH/MORE/SUBJECT/DDC,ENRICH/MORE/SUBJECT/JEL,ENRICH/MORE/OPENACCESS_VERSION,ENRICH/MORE/SUBJECT/MESHEUROPMC,ENRICH/MORE/PID + + + + + + + Set the output path to store the Event records + + outputDir + /var/lib/dnet/broker_PROD/events + + + + + + + wait configurations + + + + + + + update the BROKER events + + executeOozieJob + IIS + + { + 'graphInputPath' : 'graphInputPath', + 'datasourceIdWhitelist' : 'datasourceIdWhitelist', + 'datasourceTypeWhitelist' : 'datasourceTypeWhitelist', + 'datasourceIdBlacklist' : 'datasourceIdBlacklist', + 'topicWhitelist' : 'topicWhitelist', + 'outputDir' : 'outputDir' + } + + + { + 'oozie.wf.application.path' : '/lib/dnet/PROD/oa/broker/generate_events/oozie_app', + 'esEventIndexName' : '', + 'esNotificationsIndexName' : '', + 'esIndexHost' : '', + 'maxIndexedEventsForDsAndTopic' : '100', + 'esBatchWriteRetryCount' : '8', + 'esBatchWriteRetryWait' : '60s', + 'esBatchSizeEntries' : '200', + 'esNodesWanOnly' : 'true', + 'brokerApiBaseUrl' : '', + 'brokerDbUrl' : '', + 'brokerDbUser' : '', + 'brokerDbPassword' : '', + 'sparkDriverMemory' : '3G', + 'sparkExecutorMemory' : '7G', + 'sparkExecutorCores' : '6', + 'workingDir' : '/tmp/prod_provision/working_dir/broker_events' + } + + build-report + + + + + + + + wf_20210709_073839_206 + 2021-07-09T11:01:01+00:00 + FAILURE + + + +
\ No newline at end of file From 2efa5abda5e4cbebcc4061c0f3866fe431d3163e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 9 Aug 2021 12:28:36 +0200 Subject: [PATCH 044/166] refactoring --- .../eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml | 2 -- 1 file changed, 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index 30e500da3..71b20b356 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -123,8 +123,6 @@
- - From 9731a6144a4ed9df6a513b81cc5c390b9328971b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Tue, 10 Aug 2021 17:49:45 +0200 Subject: [PATCH 045/166] hostedbymap - in case the journal is open access the access may be changed also for the best access right in the result --- .../dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 312513285..4c3b98f3b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -34,6 +34,7 @@ object SparkApplyHostedByMapToResult { if (ei.getOpenaccess) { inst.setAccessright(OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN, "Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)) inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.hybrid) + p.setBestaccessright(OafMapperUtils.createBestAccessRights(p.getInstance())); } } From b688567db5238276a7826b7f3e3ccd9962f44f53 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 10:12:10 +0200 Subject: [PATCH 046/166] hostedbymap - modified part of test to check the bestaccessright changed --- .../eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala | 8 ++++++-- .../eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala index c48c3b35d..a48e52702 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala @@ -55,13 +55,17 @@ class TestApply extends java.io.Serializable{ assertTrue(pa.getInstance().get(0).getAccessright.getClassid.equals("OPEN")) assertTrue(pa.getInstance().get(0).getAccessright.getClassname.equals("Open Access")) assertTrue(pa.getInstance().get(0).getAccessright.getOpenAccessRoute.equals(OpenAccessRoute.hybrid)) + assertTrue(pa.getBestaccessright.getClassid.equals("OPEN")) + assertTrue(pa.getBestaccessright.getClassname.equals("Open Access")) assertTrue(pb.getInstance().get(0).getHostedby.getKey.equals("10|openaire____::0b74b6a356bbf23c245f9ae9a748745c")) assertTrue(pb.getInstance().get(0).getHostedby.getValue.equals("Revistas de investigación Universidad Nacional Mayor de San Marcos")) - assertTrue(pb.getInstance().get(0).getAccessright.getClassname.equals("Open Access")) - assertTrue(pb.getInstance().get(0).getAccessright.getClassid.equals("OPEN")) + assertTrue(pb.getInstance().get(0).getAccessright.getClassname.equals("not available")) + assertTrue(pb.getInstance().get(0).getAccessright.getClassid.equals("UNKNOWN")) assertTrue(pb.getInstance().get(0).getAccessright.getOpenAccessRoute == null) + assertTrue(pb.getBestaccessright.getClassid.equals("UNKNOWN")) + assertTrue(pb.getBestaccessright.getClassname.equals("not available")) }else{ assertTrue(pa.getInstance().get(0).getHostedby.getKey.equals(pb.getInstance().get(0).getHostedby.getKey)) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json index 602bedbda..8f6842bf3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/publication.json @@ -7,7 +7,7 @@ {"author":[{"fullname":"López-Lambas, M.","name":"M.","pid":[],"rank":1,"surname":"López-Lambas"},{"fullname":"López-Suárez, E.","name":"E.","pid":[],"rank":2,"surname":"López-Suárez"},{"fullname":"La Paix-Puello, L.","name":"L.","pid":[],"rank":3,"surname":"La Paix-Puello"},{"fullname":"Binsted, A.","name":"A.","pid":[],"rank":4,"surname":"Binsted"},{"fullname":"Tuominen, Anu","name":"Anu","pid":[],"rank":5,"surname":"Tuominen"},{"fullname":"Järvi, Tuuli","name":"Tuuli","pid":[],"rank":6,"surname":"Järvi"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-01-01"},"dateofcollection":"2021-07-10T12:26:56.401Z","dateoftransformation":"2021-07-10T14:03:19.178Z","description":[],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::899c4bc19c11e4e4ebe8d49a8c51c5f4","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2009-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0002","classname":"Book","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813924212,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2019-07-31T04:35:29Z","harvestDate":"2021-07-10T12:26:56.401Z","identifier":"oai:cris.vtt.fi:publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/d7d0af5a-3c5b-4ca5-91be-250f96153e15","50|355e65625b88::899c4bc19c11e4e4ebe8d49a8c51c5f4"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"European Commission EC"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"López-Lambas , M , López-Suárez , E , La Paix-Puello , L , Binsted , A , Tuominen , A & Järvi , T 2009 , Sustainable Development methodology development and application results : Deliverable 4.1 . European Commission EC ."}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Sustainable Development methodology development and application results:Deliverable 4.1"}]} {"author":[{"fullname":"Vänskä, L.","name":"L.","pid":[],"rank":1,"surname":"Vänskä"},{"fullname":"Rosenberg, Rolf","name":"Rolf","pid":[],"rank":2,"surname":"Rosenberg"},{"fullname":"Pitkänen, V.","name":"V.","pid":[],"rank":3,"surname":"Pitkänen"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1983-01-01"},"dateofcollection":"2021-07-10T12:29:21.556Z","dateoftransformation":"2021-07-10T15:08:41.325Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"

An automatic gamma spectrometer for activation analysis has been developed at the Technical Research Centre of Finland. The on-line system comprises a sample changer for up to 120 samples, detector, multichannel analyzer, microcomputer programmed with Basic language and input/output devices.

"}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::e8f88044b1a95057152f5827e3f373a4","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"doi","classname":"Digital Object Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"10.1016/0167-5087(83)90428-3"}],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1983-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813653066,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-01-01T03:06:44Z","harvestDate":"2021-07-10T12:29:21.556Z","identifier":"oai:cris.vtt.fi:publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|355e65625b88::e8f88044b1a95057152f5827e3f373a4","oai:cris.vtt.fi:publications/72c4b4d2-ee78-4477-8b2b-3f9a70ec1de3"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Vänskä , L , Rosenberg , R & Pitkänen , V 1983 , ' An automatic gamma spectrometer for activation analysis ' , Nuclear Instruments and Methods In Physics Research , vol. 213 , no. 2-3 , pp. 343 - 347 . https://doi.org/10.1016/0167-5087(83)90428-3"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"An automatic gamma spectrometer for activation analysis"}]} {"author":[{"fullname":"Silla, Anne","name":"Anne","pid":[],"rank":1,"surname":"Silla"}],"bestaccessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2021-01-01"},"dateofcollection":"2021-07-10T12:36:22.169Z","dateoftransformation":"2021-07-10T15:19:31.29Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This paper presents an assessment framework with 13 criteria to systematically evaluate measures for improving the safety of level crossings (LCs). The criteria were first applied in the Finnish context, where eight safety measures were estimated to reduce LC accidents by more than 20%. Next, the estimates from the Finnish study were used as a starting point for evaluating innovative and cost-effective LC safety measures piloted during an EU project, SAFER-LC. One such measure was estimated to potentially reduce LC accidents by more than 20%. The proposed assessment framework is a good way to assess and categorise LC safety measures. The summary of the assessment criteria is further intended for decision-makers looking to implement effective measures in a specific situation or at a particular LC."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|355e65625b88::f7d973bc9fc15080fa9491d997568847","instance":[{"accessright":{"classid":"RESTRICTED","classname":"Restricted","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2021-01-01"},"distributionlocation":"","hostedby":{"key":"10|openaire____::4692342f0992d91f9e705c26959f09e0","value":"VTT Research Information System"},"instancetype":{"classid":"0005","classname":"Contribution for newspaper or weekly magazine","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://cris.vtt.fi/en/publications/19966012-bcd9-4427-8136-a60e91b152f9"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813676961,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fcris.vtt.fi%2Fws%2Foai","datestamp":"2021-07-02T11:56:26Z","harvestDate":"2021-07-10T12:36:22.169Z","identifier":"oai:cris.vtt.fi:publications/19966012-bcd9-4427-8136-a60e91b152f9","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["oai:cris.vtt.fi:publications/19966012-bcd9-4427-8136-a60e91b152f9","50|355e65625b88::f7d973bc9fc15080fa9491d997568847"],"pid":[],"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Silla , A 2021 , Evaluation of Level crossing Safety Measures : Applicability of the Framework to Innovative and Low-cost Measures . in Transportation Research Board : The TRIS and ITRD database . 100th Annual Meeting of the Transportation Research Board (TRB) , Washington DC , United States , 5/01/21 . < https://trid.trb.org/view/1759287 >"}],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Evaluation of Level crossing Safety Measures:Applicability of the Framework to Innovative and Low-cost Measures"}]} -{"author":[{"fullname":"Barrientos Jiménez, Elsa Julia","name":"Elsa Julia","pid":[],"rank":1,"surname":"Barrientos Jiménez"},{"fullname":"Vildoso Villegas, Jesahel","name":"Jesahel","pid":[],"rank":2,"surname":"Vildoso Villegas"},{"fullname":"Sánchez García, Tula Carola","name":"Tula Carola","pid":[],"rank":3,"surname":"Sánchez García"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"dateofcollection":"2021-03-12T07:05:02.842Z","dateoftransformation":"2021-03-12T07:11:33.642Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This research consists of a diagnosis of the post graduate unit of UNMSM Faculty Education; it counts with the participation of 358 students of Second Specialty, Master and Doctorate programs. To carry out this diagnosis the instrument of the Iberoamerican University Association was taken into account. The following variables were used: students, teachers, study plans, research, management, surroundings, graduates and impact and evaluation. According to the established measurement the post graduate unit has obtained 69,27 points, which places it on a good level. This level considers a range point from 60 through 74."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"La investigación realiza un diagnóstico de la Unidad de Post Grado de la Facultad de Educación de la UNMSM, con la participación de 358 estudiantes de Segunda Especialidad, Maestrías y Doctorado. Para la realización del diagnóstico se consideró el instrumento de la Asociación Universitaria Iberoamericana de Post Grado, que toma en cuenta las siguientes variables: estudiantes, profesores, plan de estudios, investigación, gestión, entorno, egresados e impacto y evaluación. Realizado el diagnóstico de acuerdo a la medición establecida la UPG de Educación de acuerdo al puntaje obtenido de 69, 27 se ubica en el nivel bueno, ya que dicho nivel considera las puntuaciones comprendidas entre 60 y 74."}],"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"distributionlocation":"","hostedby":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://creativecommons.org/licenses/by-nc-sa/4.0"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://revistasinvestigacion.unmsm.edu.pe/index.php/educa/article/view/4754"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"0001-396X","issnPrinted":"1728-5852","name":"Investigación Educativa","sp":"","vol":""},"language":{"classid":"spa","classname":"Spanish; Castilian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627812364983,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Frevistasinvestigacion.unmsm.edu.pe%2Findex.php%2Findex%2Foai","datestamp":"2021-03-06T03:56:00Z","harvestDate":"2021-03-12T07:05:02.842Z","identifier":"oai:ojs.csi.unmsm:article/4754","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","oai:ojs.csi.unmsm:article/4754"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Facultad de Educación, Universidad Nacional Mayor de San Marcos"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol 14 No 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol. 14 Núm. 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1728-5852"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnostic"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluation."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnóstico"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluación."}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNOSTIC OF THE POST GRADUATE UNIT OF UNMSM EDUCATION FACULTY"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNÓSTICO DE LA UNIDAD DE POST GRADO DE LA FACULTAD DE EDUCACIÓN DE LA UNMSM"}]} +{"author":[{"fullname":"Barrientos Jiménez, Elsa Julia","name":"Elsa Julia","pid":[],"rank":1,"surname":"Barrientos Jiménez"},{"fullname":"Vildoso Villegas, Jesahel","name":"Jesahel","pid":[],"rank":2,"surname":"Vildoso Villegas"},{"fullname":"Sánchez García, Tula Carola","name":"Tula Carola","pid":[],"rank":3,"surname":"Sánchez García"}],"bestaccessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"dateofcollection":"2021-03-12T07:05:02.842Z","dateoftransformation":"2021-03-12T07:11:33.642Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"This research consists of a diagnosis of the post graduate unit of UNMSM Faculty Education; it counts with the participation of 358 students of Second Specialty, Master and Doctorate programs. To carry out this diagnosis the instrument of the Iberoamerican University Association was taken into account. The following variables were used: students, teachers, study plans, research, management, surroundings, graduates and impact and evaluation. According to the established measurement the post graduate unit has obtained 69,27 points, which places it on a good level. This level considers a range point from 60 through 74."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"La investigación realiza un diagnóstico de la Unidad de Post Grado de la Facultad de Educación de la UNMSM, con la participación de 358 estudiantes de Segunda Especialidad, Maestrías y Doctorado. Para la realización del diagnóstico se consideró el instrumento de la Asociación Universitaria Iberoamericana de Post Grado, que toma en cuenta las siguientes variables: estudiantes, profesores, plan de estudios, investigación, gestión, entorno, egresados e impacto y evaluación. Realizado el diagnóstico de acuerdo a la medición establecida la UPG de Educación de acuerdo al puntaje obtenido de 69, 27 se ubica en el nivel bueno, ya que dicho nivel considera las puntuaciones comprendidas entre 60 y 74."}],"externalReference":[],"extraInfo":[],"format":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"application/pdf"}],"fulltext":[],"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","instance":[{"accessright":{"classid":"UNKNOWN","classname":"not available","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-06-30"},"distributionlocation":"","hostedby":{"key":"10|openaire____::0b74b6a356bbf23c245f9ae9a748745c","value":"Revistas de investigación Universidad Nacional Mayor de San Marcos"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"license":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"http://creativecommons.org/licenses/by-nc-sa/4.0"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://revistasinvestigacion.unmsm.edu.pe/index.php/educa/article/view/4754"]}],"journal":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"edition":"","ep":"","iss":"","issnLinking":"","issnOnline":"0001-396X","issnPrinted":"1728-5852","name":"Investigación Educativa","sp":"","vol":""},"language":{"classid":"spa","classname":"Spanish; Castilian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627812364983,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Frevistasinvestigacion.unmsm.edu.pe%2Findex.php%2Findex%2Foai","datestamp":"2021-03-06T03:56:00Z","harvestDate":"2021-03-12T07:05:02.842Z","identifier":"oai:ojs.csi.unmsm:article/4754","metadataNamespace":"http://www.openarchives.org/OAI/2.0/oai_dc/"}},"originalId":["50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","oai:ojs.csi.unmsm:article/4754"],"pid":[],"publisher":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Facultad de Educación, Universidad Nacional Mayor de San Marcos"},"relevantdate":[],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol 14 No 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Investigación Educativa; Vol. 14 Núm. 25 (2010); 29 - 46"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1728-5852"}],"subject":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnostic"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluation."},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"Diagnóstico"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"keyword","classname":"keyword","schemeid":"dnet:subject_classification_typologies","schemename":"dnet:subject_classification_typologies"},"value":"evaluación."}],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNOSTIC OF THE POST GRADUATE UNIT OF UNMSM EDUCATION FACULTY"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"DIAGNÓSTICO DE LA UNIDAD DE POST GRADO DE LA FACULTAD DE EDUCACIÓN DE LA UNMSM"}]} {"author":[{"affiliation":[],"fullname":"Jež Rogelj, Mateja","name":"Mateja","pid":[],"rank":1,"surname":"Jež Rogelj"},{"affiliation":[],"fullname":"Mikuš, Ornella","name":"Ornella","pid":[],"rank":2,"surname":"Mikuš"},{"affiliation":[],"fullname":"Grgić, Ivo","name":"Ivo","pid":[],"rank":3,"surname":"Grgić"},{"affiliation":[],"fullname":"Zrakić, Magdalena","name":"Magdalena","pid":[],"rank":4,"surname":"Zrakić"},{"affiliation":[],"fullname":"Hadelan, Lari","name":"Lari","pid":[],"rank":5,"surname":"Hadelan"}],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"dateofcollection":"2021-07-11T01:25:30.597Z","dateoftransformation":"2021-07-11T02:00:20.813Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Koncept održivog razvoja ima tri komponente: ekološku, ekonomsku i društvenu. U ovom je radu odabirom pet ekoloških indikatora obuhvaćena ekološka komponenta. Indikatori su birani s obzirom na učestalost njihova predlaganja i korištenja u dokumentima Europske unije (EU) i znanstvenim radovima. Cilj rada je vrednovati ekološke indikatore uz argumentiranje njihove važnosti za postizanje održivog ruralnog razvoja provođenjem ankete među ekspertima i različitim dionicima ruralnog razvoja. U anketi je sudjelovalo 47 ispitanika. Od predloženih je indikatora najvišu prosječnu ocjenu dobio indikator zastupljenost ekološke poljoprivrede u ukupnoj poljoprivredi (4, 15), a slijede ga biološka raznolikost biljnih i životinjskih vrsta (4, 09), upotreba pesticida/ha (4, 06), broj uvjetnih grla/ha korištenog zemljišta (4, 00) i upotreba mineralnih gnojiva/ha (3, 91)."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::007d183f5b5b4466cf987bcd50e0c6e3","instance":[{"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2017-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/877152"]}],"language":{"classid":"hrv","classname":"Croatian","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813176553,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2017-05-25T04:42:10Z","harvestDate":"2021-07-11T01:25:30.597Z","identifier":"877152","metadataNamespace":""}},"originalId":["877152","50|57a035e5b1ae::007d183f5b5b4466cf987bcd50e0c6e3"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2017-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Prijedlog ekoloških indikatora za mjerenje održivog ruralnog razvoja"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Proposal of the environmental indicators for measuring sustainable rural development"}]} {"author":[{"affiliation":[],"fullname":"Petric, Bartul","name":"Bartul","pid":[],"rank":1,"surname":"Petric"},{"affiliation":[],"fullname":"Petric, Nedjeljka","name":"Nedjeljka","pid":[],"rank":2,"surname":"Petric"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Oliver Le Faucheux"}],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1977-01-01"},"dateofcollection":"2021-07-11T00:42:48.748Z","dateoftransformation":"2021-07-11T02:01:00.178Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"In the present paper it was studied the waste sludge separation and its use, with the purpose to prevent the sea water pollution."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::012b5c63f06424e2dc1c82ac6a7554d2","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"1977-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/314877"]}],"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813187318,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2007-12-18T09:21:18Z","harvestDate":"2021-07-11T00:42:48.748Z","identifier":"314877","metadataNamespace":""}},"originalId":["314877","50|57a035e5b1ae::012b5c63f06424e2dc1c82ac6a7554d2"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"1977-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"Treatment of Waste Sea Water in Calcium Carbide Industry"}]} {"author":[{"affiliation":[],"fullname":"Erstić, Marijana","name":"Marijana","pid":[],"rank":1,"surname":"Erstić"}],"bestaccessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"collectedfrom":[{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"}],"context":[],"contributor":[],"country":[],"coverage":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-01-01"},"dateofcollection":"2021-07-11T01:23:38.842Z","dateoftransformation":"2021-07-11T02:01:53.063Z","description":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"Die Filme Michael Hanekes scheinen vor allem mit jenen des späten Pasolini zu korrespondieren, allen voran mit SALÒ aber auch mit TEOREMA, jenem Film, in dem sich Pasolini dem italienischen Großbürgertum der ausgehenden 1960er Jahre widmet. Erzählt wird in TEOREMA die Geschichte einer zu Beginn des Films anscheinend intakten Familie, die sich durch die Begegnung mit einem Unbekannten der eigenen Leere bewusst wird und auseinanderfällt. Der Film endet mit dem Schrei eines der Protagonisten, der hier jedoch weniger ein neues Leben bedeutet, als vielmehr eine Reaktion auf die repressiven und normativen Mechanismen der Gesellschaft. Hanekes Filme LEMMINGE, TEIL II und DER SIEBENTE KONTINENT gehen jeweils unter-schiedlich von einer vergleichbaren Prämisse einer leeren Familie aus. Doch während die Schreie in den LEMMINGEN immer lauter werden, verstummen sie im SIEBENTEN KONTINENT schließlich. In allen benannten Filmen hat das Werk Francis Bacons eine besondere Rolle gespielt und wurde entweder explizit (TEOREMA, LEMMINGE II) oder implizit (DER SIEBENTE KONTINENT) thematisiert. Der Vortrag geht den Bildern des (stummen) Schreis in den genannten Filmen nach."}],"externalReference":[],"extraInfo":[],"format":[],"fulltext":[],"id":"50|57a035e5b1ae::023bc0883fb1075fe0a86e829b6c5d97","instance":[{"accessright":{"classid":"CLOSED","classname":"Closed Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"alternateIdentifier":[],"collectedfrom":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"dateofacceptance":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"value":"2014-01-01"},"hostedby":{"key":"10|openaire____::345c9d171ef3c5d706d08041d506428c","value":"Croatian Scientific Bibliography - CROSBI"},"instancetype":{"classid":"0004","classname":"Conference object","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"pid":[],"refereed":{"classid":"0000","classname":"Unknown","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"url":["https://www.bib.irb.hr/848053"]}],"language":{"classid":"deu/ger","classname":"German","schemeid":"dnet:languages","schemename":"dnet:languages"},"lastupdatetimestamp":1627813203778,"oaiprovenance":{"originDescription":{"altered":true,"baseURL":"https%3A%2F%2Fbib.irb.hr%2Foai2%2F","datestamp":"2016-12-06T18:47:39Z","harvestDate":"2021-07-11T01:23:38.842Z","identifier":"848053","metadataNamespace":""}},"originalId":["848053","50|57a035e5b1ae::023bc0883fb1075fe0a86e829b6c5d97"],"pid":[],"relevantdate":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"issued","classname":"issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"value":"2014-01-01"}],"resourcetype":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"source":[],"subject":[],"title":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"\"Teorema\" von Pier Paolo Pasolini oder: Ein Schrei auf Francis Bacons Spuren"},{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"value":"\"Teorema\" by Pier Paolo Pasolini"}]} \ No newline at end of file From 2ee21da43b8a93b8db9f75528e8c4e268e1b6187 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 11 Aug 2021 12:13:22 +0200 Subject: [PATCH 047/166] suggestions from SonarLint --- .../GenerateOoziePropertiesMojo.java | 12 +- .../WritePredefinedProjectProperties.java | 11 +- .../GenerateOoziePropertiesMojoTest.java | 16 +- .../WritePredefinedProjectPropertiesTest.java | 32 ++-- .../dhp/application/ApplicationUtils.java | 14 -- .../ArgumentApplicationParser.java | 8 +- .../dhp/application/OptionsParameter.java | 3 - .../dnetlib/dhp/collection/ApiDescriptor.java | 2 +- .../java/eu/dnetlib/dhp/common/Constants.java | 3 + .../dnetlib/dhp/common/GraphResultMapper.java | 17 +- .../eu/dnetlib/dhp/common/MakeTarArchive.java | 23 +-- .../eu/dnetlib/dhp/common/MdstoreClient.java | 9 +- .../eu/dnetlib/dhp/common/PacePerson.java | 1 - .../dhp/common/api/ZenodoAPIClient.java | 27 +-- .../dhp/common/api/zenodo/Creator.java | 6 +- .../dnetlib/dhp/common/api/zenodo/File.java | 14 -- .../dhp/common/rest/DNetRestClient.java | 37 ++-- .../dhp/common/vocabulary/Vocabulary.java | 3 +- .../common/vocabulary/VocabularyGroup.java | 13 +- .../java/eu/dnetlib/dhp/message/Message.java | 5 +- .../eu/dnetlib/dhp/oa/merge/AuthorMerger.java | 17 +- .../dhp/parser/utility/VtdUtilityParser.java | 3 + .../oaf/utils/GraphCleaningFunctions.java | 2 +- .../dhp/schema/oaf/utils/OafMapperUtils.java | 17 +- .../java/eu/dnetlib/dhp/utils/DHPUtils.java | 32 +--- .../dhp/utils/ISLookupClientFactory.java | 15 +- .../saxon/AbstractExtensionFunction.java | 2 +- .../dnetlib/dhp/utils/saxon/ExtractYear.java | 5 +- .../dhp/utils/saxon/NormalizeDate.java | 2 +- .../eu/dnetlib/dhp/utils/saxon/PickFirst.java | 5 +- .../utils/saxon/SaxonTransformerFactory.java | 3 + .../ArgumentApplicationParserTest.java | 4 +- .../dnetlib/dhp/common/HdfsSupportTest.java | 10 +- .../eu/dnetlib/dhp/common/PacePersonTest.java | 6 +- .../dhp/common/SparkSessionSupportTest.java | 6 +- .../dhp/common/api/ZenodoAPIClientTest.java | 10 +- .../dhp/oa/merge/AuthorMergerTest.java | 4 +- .../schema/oaf/utils/OafMapperUtilsTest.java | 36 ++-- .../relation/RelationMapperTest.java | 4 +- .../dnetlib/dhp/actionmanager/ISClient.java | 29 ++- .../actionmanager/promote/MergeAndGet.java | 1 + .../PromoteActionPayloadForGraphTableJob.java | 4 +- ...rtitionActionSetsByPayloadTypeJobTest.java | 2 +- .../promote/MergeAndGetTest.java | 32 ++-- ...moteActionPayloadForGraphTableJobTest.java | 4 +- .../PromoteActionPayloadFunctionsTest.java | 8 +- .../bipfinder/CollectAndSave.java | 12 +- .../bipfinder/SparkAtomicActionScoreJob.java | 6 +- .../project/PrepareProgramme.java | 19 +- .../project/PrepareProjects.java | 2 +- .../project/ReadProjectsFromDB.java | 21 +-- .../project/SparkAtomicActionJob.java | 21 +-- .../project/utils/CSVParser.java | 6 +- .../project/utils/EXCELParser.java | 82 ++++----- .../actionmanager/project/utils/ReadCSV.java | 21 ++- .../project/utils/ReadExcel.java | 30 ++-- .../ror/GenerateRorActionSetJob.java | 7 +- .../dhp/actionmanager/ror/model/Address.java | 4 +- .../dhp/actionmanager/ror/model/Country.java | 4 +- .../ror/model/ExternalIdType.java | 2 +- .../ror/model/ExternalIdTypeDeserializer.java | 3 +- .../ror/model/GeonamesAdmin.java | 2 +- .../actionmanager/ror/model/GeonamesCity.java | 2 +- .../dhp/actionmanager/ror/model/Label.java | 2 +- .../dhp/actionmanager/ror/model/License.java | 2 +- .../actionmanager/ror/model/NameAndCode.java | 4 +- .../actionmanager/ror/model/Relationship.java | 4 +- .../ror/model/RorOrganization.java | 4 +- .../aggregation/common/AggregatorReport.java | 6 +- .../dhp/aggregation/common/ReportingJob.java | 2 +- .../mdstore/MDStoreActionNode.java | 6 +- .../dhp/collection/CollectorWorker.java | 2 +- .../GenerateNativeStoreSparkJob.java | 1 + .../dhp/collection/HttpConnector2.java | 18 +- .../mongodb/MDStoreCollectorPlugin.java | 2 - .../mongodb/MongoDbDumpCollectorPlugin.java | 2 +- .../collection/plugin/oai/OaiIterator.java | 22 +-- .../collection/plugin/rest/RestIterator.java | 26 +-- .../transformation/TransformSparkJobNode.java | 10 +- .../transformation/TransformationFactory.java | 17 +- .../dhp/transformation/xslt/DateCleaner.java | 5 - .../transformation/xslt/PersonCleaner.java | 26 --- .../xslt/XSLTTransformationFunction.java | 10 +- .../transformation/xslt/utils/Capitalize.java | 2 - .../SparkAtomicActionScoreJobTest.java | 30 ++-- .../actionmanager/project/CSVParserTest.java | 4 +- .../project/EXCELParserTest.java | 2 +- .../project/PrepareH2020ProgrammeTest.java | 2 +- .../project/PrepareProjectTest.java | 2 +- .../project/SparkUpdateProjectTest.java | 2 +- .../ror/GenerateRorActionSetJobTest.java | 10 +- .../GenerateNativeStoreSparkJobTest.java | 12 +- .../plugin/rest/RestCollectorPluginTest.java | 4 +- .../CollectorWorkerApplicationTests.java | 6 +- .../transformation/TransformationJobTest.java | 16 +- .../dhp/blacklist/ReadBlacklistFromDB.java | 23 +-- .../SparkRemoveBlacklistedRelationJob.java | 6 +- .../dnetlib/dhp/blacklist/BlackListTest.java | 6 +- .../dhp/broker/model/EventFactory.java | 19 +- .../dhp/broker/oa/CheckDuplictedIdsJob.java | 11 +- .../dhp/broker/oa/IndexEventSubsetJob.java | 6 +- .../dhp/broker/oa/IndexNotificationsJob.java | 29 ++- .../dnetlib/dhp/broker/oa/IndexOnESJob.java | 3 +- .../broker/oa/PartitionEventsByDsIdJob.java | 13 +- .../oa/PrepareRelatedDatasourcesJob.java | 2 +- .../dhp/broker/oa/matchers/UpdateMatcher.java | 2 +- .../AbstractEnrichMissingDataset.java | 4 +- .../relatedProjects/EnrichMissingProject.java | 2 +- .../relatedProjects/EnrichMoreProject.java | 4 +- .../AbstractEnrichMissingPublication.java | 4 +- .../EnrichMissingSoftware.java | 2 +- .../relatedSoftware/EnrichMoreSoftware.java | 2 +- .../simple/EnrichMissingAuthorOrcid.java | 2 +- .../oa/matchers/simple/EnrichMissingPid.java | 2 +- .../matchers/simple/EnrichMissingSubject.java | 4 +- .../matchers/simple/EnrichMoreOpenAccess.java | 2 +- .../oa/matchers/simple/EnrichMorePid.java | 4 +- .../oa/matchers/simple/EnrichMoreSubject.java | 4 +- .../dhp/broker/oa/util/BrokerConstants.java | 3 + .../dhp/broker/oa/util/ClusterUtils.java | 3 + .../dhp/broker/oa/util/ConversionUtils.java | 26 +-- .../util/DatasourceRelationsAccumulator.java | 8 +- .../dhp/broker/oa/util/EventFinder.java | 5 +- .../dhp/broker/oa/util/SubscriptionUtils.java | 3 + .../dhp/broker/oa/util/TrustUtils.java | 2 + .../dhp/broker/oa/util/UpdateInfo.java | 4 +- .../broker/oa/matchers/UpdateMatcherTest.java | 5 +- .../EnrichMissingPublicationDateTest.java | 3 +- .../dhp/broker/oa/util/TrustUtilsTest.java | 26 +-- .../dhp/oa/dedup/AbstractSparkAction.java | 20 ++- .../eu/dnetlib/dhp/oa/dedup/DatePicker.java | 5 +- .../dhp/oa/dedup/DedupRecordFactory.java | 12 +- .../eu/dnetlib/dhp/oa/dedup/DedupUtility.java | 10 +- .../oa/dedup/DispatchEntitiesSparkJob.java | 9 +- .../dhp/oa/dedup/GroupEntitiesSparkJob.java | 6 +- .../eu/dnetlib/dhp/oa/dedup/IdGenerator.java | 4 +- .../dnetlib/dhp/oa/dedup/SparkBlockStats.java | 4 +- .../oa/dedup/SparkCopyOpenorgsSimRels.java | 5 +- .../dedup/SparkCopyRelationsNoOpenorgs.java | 21 +-- .../dhp/oa/dedup/SparkCreateDedupRecord.java | 3 +- .../dhp/oa/dedup/SparkCreateMergeRels.java | 14 +- .../oa/dedup/SparkCreateOrgsDedupRecord.java | 3 - .../dhp/oa/dedup/SparkCreateSimRels.java | 9 +- .../dhp/oa/dedup/SparkPrepareNewOrgs.java | 13 +- .../dhp/oa/dedup/SparkPrepareOrgRels.java | 24 +-- .../dhp/oa/dedup/SparkPropagateRelation.java | 6 +- .../dhp/oa/dedup/SparkUpdateEntity.java | 125 +++++++------ .../oa/dedup/graph/ConnectedComponent.java | 14 -- .../dhp/oa/dedup/model/Identifier.java | 8 +- .../dnetlib/dhp/oa/dedup/DatePickerTest.java | 8 +- .../dhp/oa/dedup/EntityMergerTest.java | 14 +- .../dnetlib/dhp/oa/dedup/IdGeneratorTest.java | 6 +- .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 15 +- .../dhp/oa/dedup/SparkOpenorgsDedupTest.java | 20 +-- .../oa/dedup/SparkOpenorgsProvisionTest.java | 16 +- .../dnetlib/dhp/oa/dedup/SparkStatsTest.java | 2 +- .../dhp/oa/dedup/jpath/JsonPathTest.java | 11 +- .../doiboost/crossref/CrossrefImporter.java | 9 +- .../dnetlib/doiboost/crossref/ESClient.java | 7 +- .../orcid/ActivitiesDecompressor.java | 25 +-- .../orcid/ExtractXMLActivitiesData.java | 7 +- .../orcid/ExtractXMLSummariesData.java | 10 +- .../orcid/OrcidAuthorsDOIsDataGen.java | 3 +- .../doiboost/orcid/OrcidDSManager.java | 3 +- .../orcid/SparkDownloadOrcidAuthors.java | 60 ++++--- .../orcid/SparkDownloadOrcidWorks.java | 46 ++--- .../orcid/SparkGenLastModifiedSeq.java | 24 ++- .../orcid/SparkGenerateDoiAuthorList.java | 20 +-- .../orcid/SparkUpdateOrcidAuthors.java | 35 ++-- .../orcid/SparkUpdateOrcidDatasets.java | 167 ++---------------- .../doiboost/orcid/SparkUpdateOrcidWorks.java | 27 ++- .../doiboost/orcid/SummariesDecompressor.java | 24 +-- .../doiboost/orcid/json/JsonHelper.java | 3 + .../dnetlib/doiboost/orcid/util/HDFSUtil.java | 37 ++-- .../doiboost/orcid/xml/XMLRecordParser.java | 54 +----- .../orcidnodoi/ActivitiesDumpReader.java | 18 +- .../orcidnodoi/GenOrcidAuthorWork.java | 6 +- .../SparkGenEnrichedOrcidWorks.java | 53 +++--- .../doiboost/orcidnodoi/json/JsonWriter.java | 3 + .../orcidnodoi/oaf/PublicationToOaf.java | 47 ++--- .../orcidnodoi/similarity/AuthorMatcher.java | 44 ++--- .../orcidnodoi/xml/XMLRecordParserNoDoi.java | 37 ++-- .../doiboost/orcid/OrcidClientTest.java | 25 +-- .../orcid/xml/XMLRecordParserTest.java | 42 +---- .../orcidnodoi/PublicationToOafTest.java | 23 +-- .../orcidnodoi/xml/OrcidNoDoiTest.java | 141 ++++++--------- .../eu/dnetlib/dhp/PropagationConstant.java | 11 +- .../dhp/bulktag/community/Community.java | 5 - .../community/CommunityConfiguration.java | 14 +- .../CommunityConfigurationFactory.java | 10 +- .../dhp/bulktag/community/Constraint.java | 3 - .../dhp/bulktag/community/Constraints.java | 14 +- .../dhp/bulktag/community/Provider.java | 8 - .../community/QueryInformationSystem.java | 3 +- .../dhp/bulktag/community/ResultTagger.java | 98 +++++----- .../community/SelectionConstraints.java | 3 - .../bulktag/community/TaggingConstants.java | 3 + .../bulktag/community/ZenodoCommunity.java | 1 - .../dhp/bulktag/criteria/Selection.java | 4 +- .../dhp/bulktag/criteria/VerbResolver.java | 8 +- .../bulktag/criteria/VerbResolverFactory.java | 3 + .../PrepareDatasourceCountryAssociation.java | 25 ++- .../PrepareResultCountrySet.java | 9 +- .../SparkCountryPropagationJob.java | 12 +- .../PrepareResultOrcidAssociationStep1.java | 3 - .../PrepareResultOrcidAssociationStep2.java | 2 +- .../SparkOrcidToResultFromSemRelJob.java | 36 ++-- .../PrepareProjectResultsAssociation.java | 7 +- .../SparkResultToProjectThroughSemRelJob.java | 44 ++--- .../PrepareResultCommunitySet.java | 11 +- ...kResultToCommunityFromOrganizationJob.java | 16 +- .../PrepareResultCommunitySetStep2.java | 3 +- ...parkResultToCommunityThroughSemRelJob.java | 10 +- .../PrepareResultInstRepoAssociation.java | 13 +- ...arkResultToOrganizationFromIstRepoJob.java | 58 +++--- .../dnetlib/dhp/bulktag/BulkTagJobTest.java | 16 +- .../CommunityConfigurationFactoryTest.java | 9 +- .../CountryPropagationJobTest.java | 27 ++- .../OrcidPropagationJobTest.java | 6 +- .../ProjectPropagationJobTest.java | 6 +- .../ResultToCommunityJobTest.java | 2 +- .../ResultToCommunityJobTest.java | 2 +- .../ResultToOrganizationJobTest.java | 6 +- .../dhp/oa/graph/clean/CleaningRuleMap.java | 2 +- .../dhp/oa/graph/clean/OafCleaner.java | 2 +- .../eu/dnetlib/dhp/oa/graph/dump/MakeTar.java | 13 +- .../oa/graph/dump/QueryInformationSystem.java | 9 +- .../dhp/oa/graph/dump/ResultMapper.java | 18 +- .../dhp/oa/graph/dump/SaveCommunityMap.java | 16 +- .../dhp/oa/graph/dump/SendToZenodoHDFS.java | 11 +- .../eu/dnetlib/dhp/oa/graph/dump/Utils.java | 5 +- .../graph/dump/community/CommunitySplit.java | 21 +-- .../community/SparkDumpCommunityProducts.java | 5 +- .../community/SparkPrepareResultProject.java | 27 ++- .../community/SparkUpdateProjectInfo.java | 6 +- .../dhp/oa/graph/dump/complete/Constants.java | 1 - .../dump/complete/CreateContextEntities.java | 21 ++- .../dump/complete/CreateContextRelation.java | 24 ++- .../dhp/oa/graph/dump/complete/Process.java | 4 +- .../dump/complete/QueryInformationSystem.java | 8 +- .../complete/SparkOrganizationRelation.java | 5 +- .../funderresults/SparkDumpFunderResults.java | 9 +- .../SparkResultLinkedToProject.java | 8 +- .../DatasourceCompatibilityComparator.java | 20 --- .../graph/merge/MergeGraphTableSparkJob.java | 14 +- .../raw/AbstractMdRecordToOafMapper.java | 24 +-- .../raw/GenerateEntitiesApplication.java | 10 +- .../oa/graph/raw/MergeClaimsApplication.java | 8 +- .../raw/MigrateDbEntitiesApplication.java | 25 +-- .../raw/MigrateHdfsMdstoresApplication.java | 14 +- .../raw/MigrateMongoMdstoresApplication.java | 4 +- .../dhp/oa/graph/raw/OafToOafMapper.java | 5 +- .../dhp/oa/graph/raw/OdfToOafMapper.java | 9 +- .../graph/raw/PatchRelationsApplication.java | 2 - .../common/AbstractMigrationApplication.java | 16 +- .../oa/graph/GraphHiveImporterJobTest.java | 2 +- .../clean/GraphCleaningFunctionsTest.java | 6 +- .../dhp/oa/graph/dump/GenerateJsonSchema.java | 4 +- .../dhp/oa/graph/dump/MakeTarTest.java | 2 +- .../dump/PrepareResultProjectJobTest.java | 131 +++++++------- .../dump/QueryInformationSystemTest.java | 9 +- .../oa/graph/dump/SplitForCommunityTest.java | 2 +- .../oa/graph/dump/UpdateProjectInfoTest.java | 2 +- .../dhp/oa/graph/dump/ZenodoUploadTest.java | 6 +- .../graph/dump/complete/CreateEntityTest.java | 4 +- .../dump/complete/CreateRelationTest.java | 4 +- .../ExtractRelationFromEntityTest.java | 2 +- .../dump/complete/FunderParsingTest.java | 7 +- .../complete/QueryInformationSystemTest.java | 10 +- .../RelationFromOrganizationTest.java | 2 +- .../ResultLinkedToProjectTest.java | 4 +- .../dump/funderresult/SplitPerFunderTest.java | 2 +- .../merge/MergeGraphTableSparkJobTest.java | 4 +- .../raw/GenerateEntitiesApplicationTest.java | 4 +- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 108 ++++++----- .../raw/MigrateDbEntitiesApplicationTest.java | 22 ++- .../MigrateMongoMdstoresApplicationTest.java | 2 - .../raw/PatchRelationApplicationTest.java | 2 +- .../oa/graph/reflections/ReflectionTest.java | 2 +- .../CreateRelatedEntitiesJob_phase1.java | 18 +- .../CreateRelatedEntitiesJob_phase2.java | 8 +- .../dhp/oa/provision/PrepareRelationsJob.java | 12 +- .../dhp/oa/provision/ProvisionConstants.java | 3 + .../oa/provision/SolrAdminApplication.java | 1 - .../dhp/oa/provision/XmlConverterJob.java | 7 +- .../dhp/oa/provision/XmlIndexingJob.java | 15 +- .../dhp/oa/provision/model/JoinedEntity.java | 1 - .../model/ProvisionModelSupport.java | 3 + .../dhp/oa/provision/model/RelatedEntity.java | 1 - .../utils/AuthorPidTypeComparator.java | 1 - .../dhp/oa/provision/utils/ContextMapper.java | 9 +- .../oa/provision/utils/GraphMappingUtils.java | 6 +- .../oa/provision/utils/ISLookupClient.java | 9 +- .../oa/provision/utils/LicenseComparator.java | 69 -------- .../utils/StreamingInputDocumentFactory.java | 79 +++++---- .../oa/provision/utils/TemplateFactory.java | 4 +- .../oa/provision/utils/XmlRecordFactory.java | 73 +++----- .../utils/XmlSerializationUtils.java | 29 ++- .../dhp/oa/provision/utils/ZkServers.java | 2 +- .../sx/provision/DropAndCreateESIndex.java | 1 + .../provision/SparkIndexCollectionOnES.java | 1 + .../provision/IndexRecordTransformerTest.java | 23 ++- .../oa/provision/PrepareRelationsJobTest.java | 19 +- .../provision/SolrAdminApplicationTest.java | 17 +- .../oa/provision/SortableRelationKeyTest.java | 1 - .../dhp/oa/provision/XmlIndexingJobTest.java | 4 +- .../oa/provision/XmlRecordFactoryTest.java | 20 +-- .../graph/usagerawdata/export/ConnectDB.java | 20 +-- .../usagestatsbuild/export/ConnectDB.java | 18 +- 309 files changed, 1874 insertions(+), 2497 deletions(-) delete mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java delete mode 100644 dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/LicenseComparator.java diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java index 10a25fdc3..a642dab70 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java @@ -8,8 +8,6 @@ import java.util.List; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; import org.apache.maven.plugin.AbstractMojo; -import org.apache.maven.plugin.MojoExecutionException; -import org.apache.maven.plugin.MojoFailureException; /** * Generates oozie properties which were not provided from commandline. @@ -27,7 +25,7 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo { }; @Override - public void execute() throws MojoExecutionException, MojoFailureException { + public void execute() { if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR) && !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) { String generatedSandboxName = generateSandboxName( @@ -46,24 +44,24 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo { /** * Generates sandbox name from workflow source directory. * - * @param wfSourceDir + * @param wfSourceDir workflow source directory * @return generated sandbox name */ private String generateSandboxName(String wfSourceDir) { // utilize all dir names until finding one of the limiters - List sandboxNameParts = new ArrayList(); + List sandboxNameParts = new ArrayList<>(); String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar); ArrayUtils.reverse(tokens); if (tokens.length > 0) { for (String token : tokens) { for (String limiter : limiters) { if (limiter.equals(token)) { - return sandboxNameParts.size() > 0 + return !sandboxNameParts.isEmpty() ? StringUtils.join(sandboxNameParts.toArray()) : null; } } - if (sandboxNameParts.size() > 0) { + if (!sandboxNameParts.isEmpty()) { sandboxNameParts.add(0, File.separator); } sandboxNameParts.add(0, token); diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java index d195ca86e..e3cdf5a22 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java @@ -16,6 +16,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -289,7 +290,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo { */ protected List getEscapeChars(String escapeChars) { List tokens = getListFromCSV(escapeChars); - List realTokens = new ArrayList(); + List realTokens = new ArrayList<>(); for (String token : tokens) { String realToken = getRealToken(token); realTokens.add(realToken); @@ -324,7 +325,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo { * @return content */ protected String getContent(String comment, Properties properties, List escapeTokens) { - List names = new ArrayList(properties.stringPropertyNames()); + List names = new ArrayList<>(properties.stringPropertyNames()); Collections.sort(names); StringBuilder sb = new StringBuilder(); if (!StringUtils.isBlank(comment)) { @@ -352,7 +353,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo { throws MojoExecutionException { try { String content = getContent(comment, properties, escapeTokens); - FileUtils.writeStringToFile(file, content, ENCODING_UTF8); + FileUtils.writeStringToFile(file, content, StandardCharsets.UTF_8); } catch (IOException e) { throw new MojoExecutionException("Error creating properties file", e); } @@ -399,9 +400,9 @@ public class WritePredefinedProjectProperties extends AbstractMojo { */ protected static final List getListFromCSV(String csv) { if (StringUtils.isBlank(csv)) { - return new ArrayList(); + return new ArrayList<>(); } - List list = new ArrayList(); + List list = new ArrayList<>(); String[] tokens = StringUtils.split(csv, ","); for (String token : tokens) { list.add(token.trim()); diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java index 4bfcd3b33..2ff6bea30 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java @@ -9,18 +9,18 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; /** @author mhorst, claudio.atzori */ -public class GenerateOoziePropertiesMojoTest { +class GenerateOoziePropertiesMojoTest { private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo(); @BeforeEach - public void clearSystemProperties() { + void clearSystemProperties() { System.clearProperty(PROPERTY_NAME_SANDBOX_NAME); System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR); } @Test - public void testExecuteEmpty() throws Exception { + void testExecuteEmpty() throws Exception { // execute mojo.execute(); @@ -29,7 +29,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteSandboxNameAlreadySet() throws Exception { + void testExecuteSandboxNameAlreadySet() throws Exception { // given String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers"; String sandboxName = "originalSandboxName"; @@ -44,7 +44,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteEmptyWorkflowSourceDir() throws Exception { + void testExecuteEmptyWorkflowSourceDir() throws Exception { // given String workflowSourceDir = ""; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); @@ -57,7 +57,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteNullSandboxNameGenerated() throws Exception { + void testExecuteNullSandboxNameGenerated() throws Exception { // given String workflowSourceDir = "eu/dnetlib/dhp/"; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); @@ -70,7 +70,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecute() throws Exception { + void testExecute() throws Exception { // given String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers"; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); @@ -83,7 +83,7 @@ public class GenerateOoziePropertiesMojoTest { } @Test - public void testExecuteWithoutRoot() throws Exception { + void testExecuteWithoutRoot() throws Exception { // given String workflowSourceDir = "wf/transformers"; System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java index 0b3ea9653..84b962b4b 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java @@ -20,7 +20,7 @@ import org.mockito.junit.jupiter.MockitoExtension; /** @author mhorst, claudio.atzori */ @ExtendWith(MockitoExtension.class) -public class WritePredefinedProjectPropertiesTest { +class WritePredefinedProjectPropertiesTest { @Mock private MavenProject mavenProject; @@ -39,7 +39,7 @@ public class WritePredefinedProjectPropertiesTest { // ----------------------------------- TESTS --------------------------------------------- @Test - public void testExecuteEmpty() throws Exception { + void testExecuteEmpty() throws Exception { // execute mojo.execute(); @@ -50,7 +50,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithProjectProperties() throws Exception { + void testExecuteWithProjectProperties() throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -70,7 +70,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test() - public void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) { + void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -84,7 +84,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception { + void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -108,7 +108,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception { + void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -132,7 +132,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception { + void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -164,7 +164,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder) + void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; @@ -194,7 +194,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromBlankLocation() { + void testExecuteIncludingPropertyKeysFromBlankLocation() { // given String key = "projectPropertyKey"; String value = "projectPropertyValue"; @@ -214,7 +214,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder) + void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; @@ -247,7 +247,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder) + void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder) throws Exception { // given String key = "projectPropertyKey"; @@ -273,7 +273,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception { + void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception { // given mojo.setQuiet(true); mojo.setIncludePropertyKeysFromFiles(new String[] { @@ -290,7 +290,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteIncludingPropertyKeysFromInvalidFile() { + void testExecuteIncludingPropertyKeysFromInvalidFile() { // given mojo.setIncludePropertyKeysFromFiles(new String[] { "invalid location" @@ -301,7 +301,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception { + void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception { // given mojo.setIncludeEnvironmentVariables(true); @@ -318,7 +318,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception { + void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception { // given String key = "systemPropertyKey"; String value = "systemPropertyValue"; @@ -337,7 +337,7 @@ public class WritePredefinedProjectPropertiesTest { } @Test - public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder) + void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder) throws Exception { // given String key = "systemPropertyKey "; diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java deleted file mode 100644 index c53b83561..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ApplicationUtils.java +++ /dev/null @@ -1,14 +0,0 @@ - -package eu.dnetlib.dhp.application; - -import java.io.*; -import java.util.Map; -import java.util.Properties; - -import org.apache.hadoop.conf.Configuration; - -import com.google.common.collect.Maps; - -public class ApplicationUtils { - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java index 0429bc25d..72c1f6a5e 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/ArgumentApplicationParser.java @@ -56,13 +56,13 @@ public class ArgumentApplicationParser implements Serializable { final StringWriter stringWriter = new StringWriter(); IOUtils.copy(gis, stringWriter); return stringWriter.toString(); - } catch (Throwable e) { - log.error("Wrong value to decompress:" + abstractCompressed); - throw new RuntimeException(e); + } catch (IOException e) { + log.error("Wrong value to decompress: {}", abstractCompressed); + throw new IllegalArgumentException(e); } } - public static String compressArgument(final String value) throws Exception { + public static String compressArgument(final String value) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); GZIPOutputStream gzip = new GZIPOutputStream(out); gzip.write(value.getBytes()); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java b/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java index 7004112e4..f34326d67 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/application/OptionsParameter.java @@ -9,9 +9,6 @@ public class OptionsParameter { private boolean paramRequired; private boolean compressed; - public OptionsParameter() { - } - public String getParamName() { return paramName; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java b/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java index 12937a197..fbbbffcbb 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/collection/ApiDescriptor.java @@ -34,7 +34,7 @@ public class ApiDescriptor { return params; } - public void setParams(final HashMap params) { + public void setParams(final Map params) { this.params = params; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java index 108edad47..8fab94e92 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java @@ -12,6 +12,9 @@ public class Constants { public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/"; + private Constants() { + } + static { accessRightsCoarMap.put("OPEN", "c_abf2"); accessRightsCoarMap.put("RESTRICTED", "c_16ec"); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java index 44599eb83..8ceee5c8a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/GraphResultMapper.java @@ -84,7 +84,7 @@ public class GraphResultMapper implements Serializable { .setDocumentationUrl( value .stream() - .map(v -> v.getValue()) + .map(Field::getValue) .collect(Collectors.toList()))); Optional @@ -100,20 +100,20 @@ public class GraphResultMapper implements Serializable { .setContactgroup( Optional .ofNullable(ir.getContactgroup()) - .map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setContactperson( Optional .ofNullable(ir.getContactperson()) - .map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setTool( Optional .ofNullable(ir.getTool()) - .map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); @@ -123,7 +123,8 @@ public class GraphResultMapper implements Serializable { Optional .ofNullable(input.getAuthor()) - .ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList()))); + .ifPresent( + ats -> out.setAuthor(ats.stream().map(GraphResultMapper::getAuthor).collect(Collectors.toList()))); // I do not map Access Right UNKNOWN or OTHER @@ -210,7 +211,7 @@ public class GraphResultMapper implements Serializable { if (oInst.isPresent()) { out .setInstance( - oInst.get().stream().map(i -> getInstance(i)).collect(Collectors.toList())); + oInst.get().stream().map(GraphResultMapper::getInstance).collect(Collectors.toList())); } @@ -230,7 +231,7 @@ public class GraphResultMapper implements Serializable { .stream() .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) .collect(Collectors.toList()); - if (iTitle.size() > 0) { + if (!iTitle.isEmpty()) { out.setMaintitle(iTitle.get(0).getValue()); } @@ -239,7 +240,7 @@ public class GraphResultMapper implements Serializable { .stream() .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) .collect(Collectors.toList()); - if (iTitle.size() > 0) { + if (!iTitle.isEmpty()) { out.setSubtitle(iTitle.get(0).getValue()); } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java index 7dc0e4417..d0909642c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MakeTarArchive.java @@ -14,38 +14,33 @@ public class MakeTarArchive implements Serializable { private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException { Path hdfsWritePath = new Path(outputPath); - FSDataOutputStream fsDataOutputStream = null; if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, true); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); - - return new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream()); + return new TarArchiveOutputStream(fileSystem.create(hdfsWritePath).getWrappedStream()); } private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name) throws IOException { Path hdfsWritePath = new Path(outputPath); - FSDataOutputStream fsDataOutputStream = null; if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, true); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + try (TarArchiveOutputStream ar = new TarArchiveOutputStream( + fileSystem.create(hdfsWritePath).getWrappedStream())) { - TarArchiveOutputStream ar = new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream()); + RemoteIterator iterator = fileSystem + .listFiles( + new Path(inputPath), true); - RemoteIterator fileStatusListIterator = fileSystem - .listFiles( - new Path(inputPath), true); + while (iterator.hasNext()) { + writeCurrentFile(fileSystem, dir_name, iterator, ar, 0); + } - while (fileStatusListIterator.hasNext()) { - writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, 0); } - - ar.close(); } public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name, diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java index 0bc782ccb..d06544ae1 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java @@ -10,8 +10,6 @@ import java.util.Optional; import java.util.stream.StreamSupport; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.bson.Document; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -21,6 +19,7 @@ import com.mongodb.BasicDBObject; import com.mongodb.MongoClient; import com.mongodb.MongoClientURI; import com.mongodb.QueryBuilder; +import com.mongodb.client.FindIterable; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoDatabase; @@ -46,7 +45,7 @@ public class MdstoreClient implements Closeable { final String currentId = Optional .ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query)) - .map(r -> r.first()) + .map(FindIterable::first) .map(d -> d.getString("currentId")) .orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId)); @@ -84,7 +83,7 @@ public class MdstoreClient implements Closeable { if (!Iterables.contains(client.listDatabaseNames(), dbName)) { final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress()); log.warn(err); - throw new RuntimeException(err); + throw new IllegalArgumentException(err); } return client.getDatabase(dbName); } @@ -97,7 +96,7 @@ public class MdstoreClient implements Closeable { String.format("Missing collection '%s' in database '%s'", collName, db.getName())); log.warn(err); if (abortIfMissing) { - throw new RuntimeException(err); + throw new IllegalArgumentException(err); } else { return null; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java index 6e02ca614..91c6c1825 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java @@ -24,7 +24,6 @@ import com.google.common.hash.Hashing; */ public class PacePerson { - private static final String UTF8 = "UTF-8"; private List name = Lists.newArrayList(); private List surname = Lists.newArrayList(); private List fullname = Lists.newArrayList(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java index 1f267733d..3f5c6ad4a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/ZenodoAPIClient.java @@ -5,6 +5,9 @@ import java.io.*; import java.io.IOException; import java.util.concurrent.TimeUnit; +import org.apache.http.HttpHeaders; +import org.apache.http.entity.ContentType; + import com.google.gson.Gson; import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; @@ -43,7 +46,7 @@ public class ZenodoAPIClient implements Serializable { this.deposition_id = deposition_id; } - public ZenodoAPIClient(String urlString, String access_token) throws IOException { + public ZenodoAPIClient(String urlString, String access_token) { this.urlString = urlString; this.access_token = access_token; @@ -63,8 +66,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .post(body) .build(); @@ -103,8 +106,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(bucket + "/" + file_name) - .addHeader("Content-Type", "application/zip") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len)) .build(); @@ -130,8 +133,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString + "/" + deposition_id) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .put(body) .build(); @@ -197,7 +200,7 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString + "/" + deposition_id + "/actions/newversion") - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .post(body) .build(); @@ -270,8 +273,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(urlString) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .get() .build(); @@ -293,8 +296,8 @@ public class ZenodoAPIClient implements Serializable { Request request = new Request.Builder() .url(url) - .addHeader("Content-Type", "application/json") // add request headers - .addHeader("Authorization", "Bearer " + access_token) + .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers + .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token) .get() .build(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java index c03762693..c14af55b6 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/Creator.java @@ -32,13 +32,13 @@ public class Creator { public static Creator newInstance(String name, String affiliation, String orcid) { Creator c = new Creator(); - if (!(name == null)) { + if (name != null) { c.name = name; } - if (!(affiliation == null)) { + if (affiliation != null) { c.affiliation = affiliation; } - if (!(orcid == null)) { + if (orcid != null) { c.orcid = orcid; } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java index c7428de7d..509f444b9 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/zenodo/File.java @@ -3,17 +3,12 @@ package eu.dnetlib.dhp.common.api.zenodo; import java.io.Serializable; -import net.minidev.json.annotate.JsonIgnore; - public class File implements Serializable { private String checksum; private String filename; private long filesize; private String id; - @JsonIgnore - // private Links links; - public String getChecksum() { return checksum; } @@ -46,13 +41,4 @@ public class File implements Serializable { this.id = id; } -// @JsonIgnore -// public Links getLinks() { -// return links; -// } -// -// @JsonIgnore -// public void setLinks(Links links) { -// this.links = links; -// } } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java index 98dabf56a..af6926cc7 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/rest/DNetRestClient.java @@ -1,11 +1,11 @@ package eu.dnetlib.dhp.common.rest; +import java.io.IOException; import java.util.Arrays; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; -import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpUriRequest; @@ -23,17 +23,20 @@ public class DNetRestClient { private static final ObjectMapper mapper = new ObjectMapper(); + private DNetRestClient() { + } + public static T doGET(final String url, Class clazz) throws Exception { final HttpGet httpGet = new HttpGet(url); return doHTTPRequest(httpGet, clazz); } - public static String doGET(final String url) throws Exception { + public static String doGET(final String url) throws IOException { final HttpGet httpGet = new HttpGet(url); return doHTTPRequest(httpGet); } - public static String doPOST(final String url, V objParam) throws Exception { + public static String doPOST(final String url, V objParam) throws IOException { final HttpPost httpPost = new HttpPost(url); if (objParam != null) { @@ -45,25 +48,25 @@ public class DNetRestClient { return doHTTPRequest(httpPost); } - public static T doPOST(final String url, V objParam, Class clazz) throws Exception { + public static T doPOST(final String url, V objParam, Class clazz) throws IOException { return mapper.readValue(doPOST(url, objParam), clazz); } - private static String doHTTPRequest(final HttpUriRequest r) throws Exception { - CloseableHttpClient client = HttpClients.createDefault(); + private static String doHTTPRequest(final HttpUriRequest r) throws IOException { + try (CloseableHttpClient client = HttpClients.createDefault()) { - log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString()); - log - .info( - "request headers: {}", - Arrays - .asList(r.getAllHeaders()) - .stream() - .map(h -> h.getName() + ":" + h.getValue()) - .collect(Collectors.joining(","))); + log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString()); + log + .info( + "request headers: {}", + Arrays + .asList(r.getAllHeaders()) + .stream() + .map(h -> h.getName() + ":" + h.getValue()) + .collect(Collectors.joining(","))); - CloseableHttpResponse response = client.execute(r); - return IOUtils.toString(response.getEntity().getContent()); + return IOUtils.toString(client.execute(r).getEntity().getContent()); + } } private static T doHTTPRequest(final HttpUriRequest r, Class clazz) throws Exception { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java index a9daede8f..b3eb98d4f 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/Vocabulary.java @@ -46,7 +46,7 @@ public class Vocabulary implements Serializable { } public VocabularyTerm getTerm(final String id) { - return Optional.ofNullable(id).map(s -> s.toLowerCase()).map(s -> terms.get(s)).orElse(null); + return Optional.ofNullable(id).map(String::toLowerCase).map(terms::get).orElse(null); } protected void addTerm(final String id, final String name) { @@ -81,7 +81,6 @@ public class Vocabulary implements Serializable { .ofNullable(getTermBySynonym(syn)) .map(term -> getTermAsQualifier(term.getId())) .orElse(null); - // .orElse(OafMapperUtils.unknown(getId(), getName())); } } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java index a89bb486f..d5f57849c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/vocabulary/VocabularyGroup.java @@ -46,7 +46,6 @@ public class VocabularyGroup implements Serializable { } vocs.addTerm(vocId, termId, termName); - // vocs.addSynonyms(vocId, termId, termId); } } @@ -58,7 +57,6 @@ public class VocabularyGroup implements Serializable { final String syn = arr[2].trim(); vocs.addSynonyms(vocId, termId, syn); - // vocs.addSynonyms(vocId, termId, termId); } } @@ -98,7 +96,7 @@ public class VocabularyGroup implements Serializable { .getTerms() .values() .stream() - .map(t -> t.getId()) + .map(VocabularyTerm::getId) .collect(Collectors.toCollection(HashSet::new)); } @@ -154,16 +152,19 @@ public class VocabularyGroup implements Serializable { return Optional .ofNullable(vocId) .map(String::toLowerCase) - .map(id -> vocs.containsKey(id)) + .map(vocs::containsKey) .orElse(false); } private void addSynonyms(final String vocId, final String termId, final String syn) { String id = Optional .ofNullable(vocId) - .map(s -> s.toLowerCase()) + .map(String::toLowerCase) .orElseThrow( - () -> new IllegalArgumentException(String.format("empty vocabulary id for [term:%s, synonym:%s]"))); + () -> new IllegalArgumentException( + String + .format( + "empty vocabulary id for [term:%s, synonym:%s]", termId, syn))); Optional .ofNullable(vocs.get(id)) .orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId)) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java b/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java index f1107b4b8..c7a0b5f50 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/message/Message.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.message; import java.io.Serializable; -import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; @@ -10,8 +9,8 @@ public class Message implements Serializable { private static final long serialVersionUID = 401753881204524893L; - public static String CURRENT_PARAM = "current"; - public static String TOTAL_PARAM = "total"; + public static final String CURRENT_PARAM = "current"; + public static final String TOTAL_PARAM = "total"; private MessageType messageType; diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java index 7a8e55a6e..aea046203 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.merge; import java.text.Normalizer; import java.util.*; import java.util.stream.Collectors; -import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; @@ -19,6 +18,9 @@ public class AuthorMerger { private static final Double THRESHOLD = 0.95; + private AuthorMerger() { + } + public static List merge(List> authors) { authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2))); @@ -36,7 +38,8 @@ public class AuthorMerger { public static List mergeAuthor(final List a, final List b, Double threshold) { int pa = countAuthorsPids(a); int pb = countAuthorsPids(b); - List base, enrich; + List base; + List enrich; int sa = authorsSize(a); int sb = authorsSize(b); @@ -62,7 +65,7 @@ public class AuthorMerger { // (if an Author has more than 1 pid, it appears 2 times in the list) final Map basePidAuthorMap = base .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .flatMap( a -> a .getPid() @@ -74,7 +77,7 @@ public class AuthorMerger { // (list of pid that are missing in the other list) final List> pidToEnrich = enrich .stream() - .filter(a -> a.getPid() != null && a.getPid().size() > 0) + .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .flatMap( a -> a .getPid() @@ -117,9 +120,9 @@ public class AuthorMerger { } public static String pidToComparableString(StructuredProperty pid) { - return (pid.getQualifier() != null - ? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : "" - : "") + final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() + : ""; + return (pid.getQualifier() != null ? classid : "") + (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java b/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java index 9ac0a0bf7..fd4c0191a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/parser/utility/VtdUtilityParser.java @@ -12,6 +12,9 @@ import com.ximpleware.VTDNav; /** Created by sandro on 9/29/16. */ public class VtdUtilityParser { + private VtdUtilityParser() { + } + public static List getTextValuesWithAttributes( final AutoPilot ap, final VTDNav vn, final String xpath, final List attributes) throws VtdException { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 1d002ed7e..d8b1cded8 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -284,7 +284,7 @@ public class GraphCleaningFunctions extends CleaningFunctions { r .getAuthor() .stream() - .filter(a -> Objects.nonNull(a)) + .filter(Objects::nonNull) .filter(a -> StringUtils.isNotBlank(a.getFullname())) .filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", ""))) .collect(Collectors.toList())); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java index c6a8fd5a7..720fe47fb 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtils.java @@ -17,13 +17,16 @@ import eu.dnetlib.dhp.schema.oaf.*; public class OafMapperUtils { + private OafMapperUtils() { + } + public static Oaf merge(final Oaf left, final Oaf right) { if (ModelSupport.isSubClass(left, OafEntity.class)) { return mergeEntities((OafEntity) left, (OafEntity) right); } else if (ModelSupport.isSubClass(left, Relation.class)) { ((Relation) left).mergeFrom((Relation) right); } else { - throw new RuntimeException("invalid Oaf type:" + left.getClass().getCanonicalName()); + throw new IllegalArgumentException("invalid Oaf type:" + left.getClass().getCanonicalName()); } return left; } @@ -38,7 +41,7 @@ public class OafMapperUtils { } else if (ModelSupport.isSubClass(left, Project.class)) { left.mergeFrom(right); } else { - throw new RuntimeException("invalid OafEntity subtype:" + left.getClass().getCanonicalName()); + throw new IllegalArgumentException("invalid OafEntity subtype:" + left.getClass().getCanonicalName()); } return left; } @@ -62,7 +65,7 @@ public class OafMapperUtils { public static List listKeyValues(final String... s) { if (s.length % 2 > 0) { - throw new RuntimeException("Invalid number of parameters (k,v,k,v,....)"); + throw new IllegalArgumentException("Invalid number of parameters (k,v,k,v,....)"); } final List list = new ArrayList<>(); @@ -88,7 +91,7 @@ public class OafMapperUtils { .stream(values) .map(v -> field(v, info)) .filter(Objects::nonNull) - .filter(distinctByKey(f -> f.getValue())) + .filter(distinctByKey(Field::getValue)) .collect(Collectors.toList()); } @@ -97,7 +100,7 @@ public class OafMapperUtils { .stream() .map(v -> field(v, info)) .filter(Objects::nonNull) - .filter(distinctByKey(f -> f.getValue())) + .filter(distinctByKey(Field::getValue)) .collect(Collectors.toList()); } @@ -342,10 +345,10 @@ public class OafMapperUtils { if (instanceList != null) { final Optional min = instanceList .stream() - .map(i -> i.getAccessright()) + .map(Instance::getAccessright) .min(new AccessRightComparator<>()); - final Qualifier rights = min.isPresent() ? qualifier(min.get()) : new Qualifier(); + final Qualifier rights = min.map(OafMapperUtils::qualifier).orElseGet(Qualifier::new); if (StringUtils.isBlank(rights.getClassid())) { rights.setClassid(UNKNOWN); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java index 8d760a2cd..6a86f30df 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/DHPUtils.java @@ -34,6 +34,9 @@ public class DHPUtils { private static final Logger log = LoggerFactory.getLogger(DHPUtils.class); + private DHPUtils() { + } + public static Seq toSeq(List list) { return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq(); } @@ -44,7 +47,7 @@ public class DHPUtils { md.update(s.getBytes(StandardCharsets.UTF_8)); return new String(Hex.encodeHex(md.digest())); } catch (final Exception e) { - System.err.println("Error creating id"); + log.error("Error creating id from {}", s); return null; } } @@ -53,33 +56,6 @@ public class DHPUtils { return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId)); } - public static String compressString(final String input) { - try (ByteArrayOutputStream out = new ByteArrayOutputStream(); - Base64OutputStream b64os = new Base64OutputStream(out)) { - GZIPOutputStream gzip = new GZIPOutputStream(b64os); - gzip.write(input.getBytes(StandardCharsets.UTF_8)); - gzip.close(); - return out.toString(); - } catch (Throwable e) { - return null; - } - } - - public static String decompressString(final String input) { - byte[] byteArray = Base64.decodeBase64(input.getBytes()); - int len; - try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray))); - ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) { - byte[] buffer = new byte[1024]; - while ((len = gis.read(buffer)) != -1) { - bos.write(buffer, 0, len); - } - return bos.toString(); - } catch (Exception e) { - return null; - } - } - public static String getJPathString(final String jsonPath, final String json) { try { Object o = JsonPath.read(json, jsonPath); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java index b326c4159..8ae0bb5c3 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/ISLookupClientFactory.java @@ -18,13 +18,16 @@ public class ISLookupClientFactory { private static final int requestTimeout = 60000 * 10; private static final int connectTimeout = 60000 * 10; + private ISLookupClientFactory() { + } + public static ISLookUpService getLookUpService(final String isLookupUrl) { return getServiceStub(ISLookUpService.class, isLookupUrl); } @SuppressWarnings("unchecked") private static T getServiceStub(final Class clazz, final String endpoint) { - log.info(String.format("creating %s stub from %s", clazz.getName(), endpoint)); + log.info("creating {} stub from {}", clazz.getName(), endpoint); final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean(); jaxWsProxyFactory.setServiceClass(clazz); jaxWsProxyFactory.setAddress(endpoint); @@ -38,12 +41,10 @@ public class ISLookupClientFactory { log .info( - String - .format( - "setting connectTimeout to %s, requestTimeout to %s for service %s", - connectTimeout, - requestTimeout, - clazz.getCanonicalName())); + "setting connectTimeout to {}, requestTimeout to {} for service {}", + connectTimeout, + requestTimeout, + clazz.getCanonicalName()); policy.setConnectionTimeout(connectTimeout); policy.setReceiveTimeout(requestTimeout); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java index 9b00b908c..81f1b5142 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/AbstractExtensionFunction.java @@ -10,7 +10,7 @@ import net.sf.saxon.trans.XPathException; public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition { - public static String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension"; + public static final String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension"; public abstract String getName(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java index c7e311b02..1ea2b9f46 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/ExtractYear.java @@ -26,7 +26,7 @@ public class ExtractYear extends AbstractExtensionFunction { @Override public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException { - if (arguments == null | arguments.length == 0) { + if (arguments == null || arguments.length == 0) { return new StringValue(""); } final Item item = arguments[0].head(); @@ -63,8 +63,7 @@ public class ExtractYear extends AbstractExtensionFunction { for (String format : dateFormats) { try { c.setTime(new SimpleDateFormat(format).parse(s)); - String year = String.valueOf(c.get(Calendar.YEAR)); - return year; + return String.valueOf(c.get(Calendar.YEAR)); } catch (ParseException e) { } } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java index 1b5f3c40d..3e5def9b5 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/NormalizeDate.java @@ -30,7 +30,7 @@ public class NormalizeDate extends AbstractExtensionFunction { @Override public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException { - if (arguments == null | arguments.length == 0) { + if (arguments == null || arguments.length == 0) { return new StringValue(BLANK); } String s = arguments[0].head().getStringValue(); diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java index 46ecafd0a..b46a415d8 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/PickFirst.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.utils.saxon; +import static org.apache.commons.lang3.StringUtils.isNotBlank; + import org.apache.commons.lang3.StringUtils; import net.sf.saxon.expr.XPathContext; @@ -26,7 +28,8 @@ public class PickFirst extends AbstractExtensionFunction { final String s1 = getValue(arguments[0]); final String s2 = getValue(arguments[1]); - return new StringValue(StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : ""); + final String value = isNotBlank(s1) ? s1 : isNotBlank(s2) ? s2 : ""; + return new StringValue(value); } private String getValue(final Sequence arg) throws XPathException { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java index b85d866f1..61049d2e1 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/utils/saxon/SaxonTransformerFactory.java @@ -12,6 +12,9 @@ import net.sf.saxon.TransformerFactoryImpl; public class SaxonTransformerFactory { + private SaxonTransformerFactory() { + } + /** * Creates the index record transformer from the given XSLT * diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java index e14020830..1788239f2 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/application/ArgumentApplicationParserTest.java @@ -7,10 +7,10 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import org.apache.commons.io.IOUtils; import org.junit.jupiter.api.Test; -public class ArgumentApplicationParserTest { +class ArgumentApplicationParserTest { @Test - public void testParseParameter() throws Exception { + void testParseParameter() throws Exception { final String jsonConfiguration = IOUtils .toString( this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json")); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java index 870943816..fa721d5e5 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/HdfsSupportTest.java @@ -21,13 +21,13 @@ public class HdfsSupportTest { class Remove { @Test - public void shouldThrowARuntimeExceptionOnError() { + void shouldThrowARuntimeExceptionOnError() { // when assertThrows(RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration())); } @Test - public void shouldRemoveADirFromHDFS(@TempDir Path tempDir) { + void shouldRemoveADirFromHDFS(@TempDir Path tempDir) { // when HdfsSupport.remove(tempDir.toString(), new Configuration()); @@ -36,7 +36,7 @@ public class HdfsSupportTest { } @Test - public void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException { + void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException { // given Path file = Files.createTempFile(tempDir, "p", "s"); @@ -52,13 +52,13 @@ public class HdfsSupportTest { class ListFiles { @Test - public void shouldThrowARuntimeExceptionOnError() { + void shouldThrowARuntimeExceptionOnError() { // when assertThrows(RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration())); } @Test - public void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException { + void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException { Path subDir1 = Files.createTempDirectory(tempDir, "list_me"); Path subDir2 = Files.createTempDirectory(tempDir, "list_me"); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java index 5ebd7213e..cb9ae2886 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/PacePersonTest.java @@ -5,10 +5,10 @@ import static org.junit.jupiter.api.Assertions.*; import org.junit.jupiter.api.Test; -public class PacePersonTest { +class PacePersonTest { @Test - public void pacePersonTest1() { + void pacePersonTest1() { PacePerson p = new PacePerson("Artini, Michele", false); assertEquals("Artini", p.getSurnameString()); @@ -17,7 +17,7 @@ public class PacePersonTest { } @Test - public void pacePersonTest2() { + void pacePersonTest2() { PacePerson p = new PacePerson("Michele G. Artini", false); assertEquals("Artini, Michele G.", p.getNormalisedFullname()); assertEquals("Michele G", p.getNameString()); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java index 2f01c0863..8fa966c2f 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/SparkSessionSupportTest.java @@ -18,7 +18,8 @@ public class SparkSessionSupportTest { class RunWithSparkSession { @Test - public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() + @SuppressWarnings("unchecked") + void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() throws Exception { // given SparkSession spark = mock(SparkSession.class); @@ -37,7 +38,8 @@ public class SparkSessionSupportTest { } @Test - public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() + @SuppressWarnings("unchecked") + void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() throws Exception { // given SparkSession spark = mock(SparkSession.class); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java index 9ae9c33c2..2ccaed3e4 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/api/ZenodoAPIClientTest.java @@ -12,7 +12,7 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @Disabled -public class ZenodoAPIClientTest { +class ZenodoAPIClientTest { private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions"; private final String ACCESS_TOKEN = ""; @@ -22,7 +22,7 @@ public class ZenodoAPIClientTest { private final String depositionId = "674915"; @Test - public void testUploadOldDeposition() throws IOException, MissingConceptDoiException { + void testUploadOldDeposition() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId)); @@ -44,7 +44,7 @@ public class ZenodoAPIClientTest { } @Test - public void testNewDeposition() throws IOException { + void testNewDeposition() throws IOException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); @@ -67,7 +67,7 @@ public class ZenodoAPIClientTest { } @Test - public void testNewVersionNewName() throws IOException, MissingConceptDoiException { + void testNewVersionNewName() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); @@ -87,7 +87,7 @@ public class ZenodoAPIClientTest { } @Test - public void testNewVersionOldName() throws IOException, MissingConceptDoiException { + void testNewVersionOldName() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java index 9c4e62214..3a7a41a1b 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/oa/merge/AuthorMergerTest.java @@ -21,7 +21,7 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.pace.util.MapDocumentUtil; import scala.Tuple2; -public class AuthorMergerTest { +class AuthorMergerTest { private String publicationsBasePath; @@ -43,7 +43,7 @@ public class AuthorMergerTest { } @Test - public void mergeTest() { // used in the dedup: threshold set to 0.95 + void mergeTest() { // used in the dedup: threshold set to 0.95 for (List authors1 : authors) { System.out.println("List " + (authors.indexOf(authors1) + 1)); diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java index 8d519a93f..4068f0abb 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java @@ -21,7 +21,7 @@ import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Result; import me.xuender.unidecode.Unidecode; -public class OafMapperUtilsTest { +class OafMapperUtilsTest { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @@ -42,7 +42,7 @@ public class OafMapperUtilsTest { } @Test - public void testDateValidation() { + void testDateValidation() { assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent()); assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent()); @@ -147,44 +147,46 @@ public class OafMapperUtilsTest { } @Test - public void testDate() { - System.out.println(GraphCleaningFunctions.cleanDate("23-FEB-1998")); + void testDate() { + final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998"); + assertNotNull(date); + System.out.println(date); } @Test - public void testMergePubs() throws IOException { + void testMergePubs() throws IOException { Publication p1 = read("publication_1.json", Publication.class); Publication p2 = read("publication_2.json", Publication.class); Dataset d1 = read("dataset_1.json", Dataset.class); Dataset d2 = read("dataset_2.json", Dataset.class); - assertEquals(p1.getCollectedfrom().size(), 1); - assertEquals(p1.getCollectedfrom().get(0).getKey(), ModelConstants.CROSSREF_ID); - assertEquals(d2.getCollectedfrom().size(), 1); + assertEquals(1, p1.getCollectedfrom().size()); + assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey()); + assertEquals(1, d2.getCollectedfrom().size()); assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); - assertTrue( + assertEquals( + ModelConstants.PUBLICATION_RESULTTYPE_CLASSID, OafMapperUtils .mergeResults(p1, d2) .getResulttype() - .getClassid() - .equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID)); + .getClassid()); - assertEquals(p2.getCollectedfrom().size(), 1); + assertEquals(1, p2.getCollectedfrom().size()); assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); - assertEquals(d1.getCollectedfrom().size(), 1); + assertEquals(1, d1.getCollectedfrom().size()); assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); - assertTrue( + assertEquals( + ModelConstants.DATASET_RESULTTYPE_CLASSID, OafMapperUtils .mergeResults(p2, d1) .getResulttype() - .getClassid() - .equals(ModelConstants.DATASET_RESULTTYPE_CLASSID)); + .getClassid()); } protected HashSet cfId(List collectedfrom) { - return collectedfrom.stream().map(c -> c.getKey()).collect(Collectors.toCollection(HashSet::new)); + return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new)); } protected T read(String filename, Class clazz) throws IOException { diff --git a/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java b/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java index d1d1ada71..5743b0831 100644 --- a/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/scholexplorer/relation/RelationMapperTest.java @@ -3,10 +3,10 @@ package eu.dnetlib.scholexplorer.relation; import org.junit.jupiter.api.Test; -public class RelationMapperTest { +class RelationMapperTest { @Test - public void testLoadRels() throws Exception { + void testLoadRels() throws Exception { RelationMapper relationMapper = RelationMapper.load(); relationMapper.keySet().forEach(System.out::println); diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java index 5a80c0b53..088e618c7 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java @@ -3,40 +3,37 @@ package eu.dnetlib.dhp.actionmanager; import java.io.Serializable; import java.io.StringReader; -import java.util.*; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Triple; import org.dom4j.Document; import org.dom4j.DocumentException; -import org.dom4j.Element; import org.dom4j.io.SAXReader; -import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; import com.google.common.collect.Sets; import eu.dnetlib.actionmanager.rmi.ActionManagerException; -import eu.dnetlib.actionmanager.set.ActionManagerSet; -import eu.dnetlib.actionmanager.set.ActionManagerSet.ImpactTypes; -import eu.dnetlib.dhp.actionmanager.partition.PartitionActionSetsByPayloadTypeJob; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import scala.Tuple2; public class ISClient implements Serializable { - private static final Logger log = LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class); + private static final Logger log = LoggerFactory.getLogger(ISClient.class); private static final String INPUT_ACTION_SET_ID_SEPARATOR = ","; - private final ISLookUpService isLookup; + private final transient ISLookUpService isLookup; public ISClient(String isLookupUrl) { isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl); @@ -63,7 +60,7 @@ public class ISClient implements Serializable { .map( sets -> sets .stream() - .map(set -> parseSetInfo(set)) + .map(ISClient::parseSetInfo) .filter(t -> ids.contains(t.getLeft())) .map(t -> buildDirectory(basePath, t)) .collect(Collectors.toList())) @@ -73,15 +70,17 @@ public class ISClient implements Serializable { } } - private Triple parseSetInfo(String set) { + private static Triple parseSetInfo(String set) { try { - Document doc = new SAXReader().read(new StringReader(set)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + Document doc = reader.read(new StringReader(set)); return Triple .of( doc.valueOf("//SET/@id"), doc.valueOf("//SET/@directory"), doc.valueOf("//SET/@latest")); - } catch (DocumentException e) { + } catch (DocumentException | SAXException e) { throw new IllegalStateException(e); } } @@ -99,7 +98,7 @@ public class ISClient implements Serializable { final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='" + propertyName + "']/@value/string()"; - log.debug("quering for service property: " + q); + log.debug("quering for service property: {}", q); try { final List value = isLookup.quickSearchProfile(q); return Iterables.getOnlyElement(value); diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java index fbb072957..eccfa445c 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGet.java @@ -62,6 +62,7 @@ public class MergeAndGet { x.getClass().getCanonicalName(), y.getClass().getCanonicalName())); } + @SuppressWarnings("unchecked") private static G selectNewerAndGet(G x, A y) { if (x.getClass().equals(y.getClass()) && x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) { diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java index 7893fcf8b..c5f252c97 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java @@ -74,7 +74,9 @@ public class PromoteActionPayloadForGraphTableJob { .orElse(true); logger.info("shouldGroupById: {}", shouldGroupById); + @SuppressWarnings("unchecked") Class rowClazz = (Class) Class.forName(graphTableClassName); + @SuppressWarnings("unchecked") Class actionPayloadClazz = (Class) Class.forName(actionPayloadClassName); throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz); @@ -152,7 +154,7 @@ public class PromoteActionPayloadForGraphTableJob { return spark .read() .parquet(path) - .map((MapFunction) value -> extractPayload(value), Encoders.STRING()) + .map((MapFunction) PromoteActionPayloadForGraphTableJob::extractPayload, Encoders.STRING()) .map( (MapFunction) value -> decodePayload(actionPayloadClazz, value), Encoders.bean(actionPayloadClazz)); diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java index f51c697f4..62eec13d5 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/partition/PartitionActionSetsByPayloadTypeJobTest.java @@ -80,7 +80,7 @@ public class PartitionActionSetsByPayloadTypeJobTest { private ISClient isClient; @Test - public void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception { + void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception { // given Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets"); Path outputDir = workingDir.resolve("output"); diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java index b2248d77a..4c88e9de3 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/MergeAndGetTest.java @@ -20,7 +20,7 @@ public class MergeAndGetTest { class MergeFromAndGetStrategy { @Test - public void shouldThrowForOafAndOaf() { + void shouldThrowForOafAndOaf() { // given Oaf a = mock(Oaf.class); Oaf b = mock(Oaf.class); @@ -33,7 +33,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafAndRelation() { + void shouldThrowForOafAndRelation() { // given Oaf a = mock(Oaf.class); Relation b = mock(Relation.class); @@ -46,7 +46,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafAndOafEntity() { + void shouldThrowForOafAndOafEntity() { // given Oaf a = mock(Oaf.class); OafEntity b = mock(OafEntity.class); @@ -59,7 +59,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForRelationAndOaf() { + void shouldThrowForRelationAndOaf() { // given Relation a = mock(Relation.class); Oaf b = mock(Oaf.class); @@ -72,7 +72,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForRelationAndOafEntity() { + void shouldThrowForRelationAndOafEntity() { // given Relation a = mock(Relation.class); OafEntity b = mock(OafEntity.class); @@ -85,7 +85,7 @@ public class MergeAndGetTest { } @Test - public void shouldBehaveProperlyForRelationAndRelation() { + void shouldBehaveProperlyForRelationAndRelation() { // given Relation a = mock(Relation.class); Relation b = mock(Relation.class); @@ -101,7 +101,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndOaf() { + void shouldThrowForOafEntityAndOaf() { // given OafEntity a = mock(OafEntity.class); Oaf b = mock(Oaf.class); @@ -114,7 +114,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndRelation() { + void shouldThrowForOafEntityAndRelation() { // given OafEntity a = mock(OafEntity.class); Relation b = mock(Relation.class); @@ -127,7 +127,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndOafEntityButNotSubclasses() { + void shouldThrowForOafEntityAndOafEntityButNotSubclasses() { // given class OafEntitySub1 extends OafEntity { } @@ -145,7 +145,7 @@ public class MergeAndGetTest { } @Test - public void shouldBehaveProperlyForOafEntityAndOafEntity() { + void shouldBehaveProperlyForOafEntityAndOafEntity() { // given OafEntity a = mock(OafEntity.class); OafEntity b = mock(OafEntity.class); @@ -165,7 +165,7 @@ public class MergeAndGetTest { class SelectNewerAndGetStrategy { @Test - public void shouldThrowForOafEntityAndRelation() { + void shouldThrowForOafEntityAndRelation() { // given OafEntity a = mock(OafEntity.class); Relation b = mock(Relation.class); @@ -178,7 +178,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForRelationAndOafEntity() { + void shouldThrowForRelationAndOafEntity() { // given Relation a = mock(Relation.class); OafEntity b = mock(OafEntity.class); @@ -191,7 +191,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowForOafEntityAndResult() { + void shouldThrowForOafEntityAndResult() { // given OafEntity a = mock(OafEntity.class); Result b = mock(Result.class); @@ -204,7 +204,7 @@ public class MergeAndGetTest { } @Test - public void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() { + void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() { // given // real types must be used because subclass-superclass resolution does not work for // mocks @@ -221,7 +221,7 @@ public class MergeAndGetTest { } @Test - public void shouldShouldReturnLeftForOafEntityAndOafEntity() { + void shouldShouldReturnLeftForOafEntityAndOafEntity() { // given OafEntity a = mock(OafEntity.class); when(a.getLastupdatetimestamp()).thenReturn(1L); @@ -238,7 +238,7 @@ public class MergeAndGetTest { } @Test - public void shouldShouldReturnRightForOafEntityAndOafEntity() { + void shouldShouldReturnRightForOafEntityAndOafEntity() { // given OafEntity a = mock(OafEntity.class); when(a.getLastupdatetimestamp()).thenReturn(2L); diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java index 79ab55e07..99ce961aa 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJobTest.java @@ -77,7 +77,7 @@ public class PromoteActionPayloadForGraphTableJobTest { class Main { @Test - public void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() { + void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() { // given Class rowClazz = Relation.class; Class actionPayloadClazz = OafEntity.class; @@ -116,7 +116,7 @@ public class PromoteActionPayloadForGraphTableJobTest { @ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}") @MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams") - public void shouldPromoteActionPayloadForGraphTable( + void shouldPromoteActionPayloadForGraphTable( MergeAndGet.Strategy strategy, Class rowClazz, Class actionPayloadClazz) diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java index 477e4b204..cbc1bfaba 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java @@ -44,7 +44,7 @@ public class PromoteActionPayloadFunctionsTest { class JoinTableWithActionPayloadAndMerge { @Test - public void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() { + void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() { // given class OafImpl extends Oaf { } @@ -58,7 +58,7 @@ public class PromoteActionPayloadFunctionsTest { } @Test - public void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() { + void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() { // given String id0 = "id0"; String id1 = "id1"; @@ -138,7 +138,7 @@ public class PromoteActionPayloadFunctionsTest { } @Test - public void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() { + void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() { // given String id0 = "id0"; String id1 = "id1"; @@ -218,7 +218,7 @@ public class PromoteActionPayloadFunctionsTest { class GroupTableByIdAndMerge { @Test - public void shouldRunProperly() { + void shouldRunProperly() { // given String id1 = "id1"; String id2 = "id2"; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java index 4b9fd33f4..a48b84a33 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/CollectAndSave.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.actionmanager.bipfinder; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -28,15 +29,16 @@ import eu.dnetlib.dhp.schema.oaf.Result; public class CollectAndSave implements Serializable { private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { + public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( - CollectAndSave.class - .getResourceAsStream( - "/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json")); + Objects + .requireNonNull( + CollectAndSave.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java index cea8c2891..f178451c1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java @@ -87,7 +87,7 @@ public class SparkAtomicActionScoreJob implements Serializable { private static void prepareResults(SparkSession spark, String inputPath, String outputPath, String bipScorePath, Class inputClazz) { - final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD bipDeserializeJavaRDD = sc .textFile(bipScorePath) @@ -101,8 +101,6 @@ public class SparkAtomicActionScoreJob implements Serializable { return bs; }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)); - System.out.println(bipScores.count()); - Dataset results = readPath(spark, inputPath, inputClazz); results.createOrReplaceTempView("result"); @@ -124,7 +122,7 @@ public class SparkAtomicActionScoreJob implements Serializable { ret.setId(value._2().getId()); return ret; }, Encoders.bean(BipScore.class)) - .groupByKey((MapFunction) value -> value.getId(), Encoders.STRING()) + .groupByKey((MapFunction) BipScore::getId, Encoders.STRING()) .mapGroups((MapGroupsFunction) (k, it) -> { Result ret = new Result(); ret.setDataInfo(getDataInfo()); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java index 9e852eb77..40cf5ee53 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java @@ -171,26 +171,23 @@ public class PrepareProgramme { } private static CSVProgramme groupProgrammeByCode(CSVProgramme a, CSVProgramme b) { - if (!a.getLanguage().equals("en")) { - if (b.getLanguage().equalsIgnoreCase("en")) { - a.setTitle(b.getTitle()); - a.setLanguage(b.getLanguage()); - } + if (!a.getLanguage().equals("en") && b.getLanguage().equalsIgnoreCase("en")) { + a.setTitle(b.getTitle()); + a.setLanguage(b.getLanguage()); } - if (StringUtils.isEmpty(a.getShortTitle())) { - if (!StringUtils.isEmpty(b.getShortTitle())) { - a.setShortTitle(b.getShortTitle()); - } + if (StringUtils.isEmpty(a.getShortTitle()) && !StringUtils.isEmpty(b.getShortTitle())) { + a.setShortTitle(b.getShortTitle()); } return a; } + @SuppressWarnings("unchecked") private static List prepareClassification(JavaRDD h2020Programmes) { Object[] codedescription = h2020Programmes .map( value -> new Tuple2<>(value.getCode(), - new Tuple2(value.getTitle(), value.getShortTitle()))) + new Tuple2<>(value.getTitle(), value.getShortTitle()))) .collect() .toArray(); @@ -216,7 +213,7 @@ public class PrepareProgramme { String[] tmp = ent.split("\\."); if (tmp.length <= 2) { if (StringUtils.isEmpty(entry._2()._2())) { - map.put(entry._1(), new Tuple2(entry._2()._1(), entry._2()._1())); + map.put(entry._1(), new Tuple2<>(entry._2()._1(), entry._2()._1())); } else { map.put(entry._1(), entry._2()); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java index cecd537ba..b1a381415 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java @@ -29,7 +29,7 @@ import scala.Tuple2; */ public class PrepareProjects { - private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class); + private static final Logger log = LoggerFactory.getLogger(PrepareProjects.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static void main(String[] args) throws Exception { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java index 2bba9fb60..2cc20cb15 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/ReadProjectsFromDB.java @@ -31,15 +31,16 @@ import eu.dnetlib.dhp.common.DbClient; */ public class ReadProjectsFromDB implements Closeable { - private final DbClient dbClient; private static final Log log = LogFactory.getLog(ReadProjectsFromDB.class); + + private static final String query = "SELECT code " + + "from projects where id like 'corda__h2020%' "; + + private final DbClient dbClient; private final Configuration conf; private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private final static String query = "SELECT code " + - "from projects where id like 'corda__h2020%' "; - public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -65,9 +66,9 @@ public class ReadProjectsFromDB implements Closeable { } } - public void execute(final String sql, final Function> producer) throws Exception { + public void execute(final String sql, final Function> producer) { - final Consumer consumer = rs -> producer.apply(rs).forEach(r -> writeProject(r)); + final Consumer consumer = rs -> producer.apply(rs).forEach(this::writeProject); dbClient.processResults(sql, consumer); } @@ -94,20 +95,20 @@ public class ReadProjectsFromDB implements Closeable { public ReadProjectsFromDB( final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword) - throws Exception { + throws IOException { this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); this.conf = new Configuration(); this.conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(this.conf); Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, false); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + FSDataOutputStream fos = fileSystem.create(hdfsWritePath); - this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); } @Override diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java index fdc12c662..a4a0bf6a4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java @@ -31,6 +31,7 @@ import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.H2020Classification; import eu.dnetlib.dhp.schema.oaf.H2020Programme; +import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; @@ -47,13 +48,10 @@ import scala.Tuple2; * * To produce one single entry for each project code a step of groupoing is needed: each project can be associated to more * than one programme. - * - * */ public class SparkAtomicActionJob { private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final HashMap programmeMap = new HashMap<>(); public static void main(String[] args) throws Exception { @@ -137,7 +135,6 @@ public class SparkAtomicActionJob { h2020classification.setClassification(csvProgramme.getClassification()); h2020classification.setH2020Programme(pm); setLevelsandProgramme(h2020classification, csvProgramme.getClassification_short()); - // setProgramme(h2020classification, ocsvProgramme.get().getClassification()); pp.setH2020classification(Arrays.asList(h2020classification)); return pp; @@ -152,20 +149,16 @@ public class SparkAtomicActionJob { .map((MapFunction, Project>) p -> { Optional op = Optional.ofNullable(p._2()); Project rp = p._1(); - if (op.isPresent()) { - rp.setH2020topicdescription(op.get().getTitle()); - } + op.ifPresent(excelTopic -> rp.setH2020topicdescription(excelTopic.getTitle())); return rp; }, Encoders.bean(Project.class)) .filter(Objects::nonNull) .groupByKey( - (MapFunction) p -> p.getId(), + (MapFunction) OafEntity::getId, Encoders.STRING()) .mapGroups((MapGroupsFunction) (s, it) -> { Project first = it.next(); - it.forEachRemaining(p -> { - first.mergeFrom(p); - }); + it.forEachRemaining(first::mergeFrom); return first; }, Encoders.bean(Project.class)) .toJavaRDD() @@ -189,12 +182,6 @@ public class SparkAtomicActionJob { h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]); } -// private static void setProgramme(H2020Classification h2020Classification, String classification) { -// String[] tmp = classification.split(" \\| "); -// -// h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]); -// } - public static Dataset readPath( SparkSession spark, String inputPath, Class clazz) { return spark diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java index 8bdce903b..e2d8c528e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java @@ -27,12 +27,14 @@ public class CSVParser { final Set headers = parser.getHeaderMap().keySet(); Class clazz = Class.forName(classForName); for (CSVRecord csvRecord : parser.getRecords()) { - final Object cc = clazz.newInstance(); + + @SuppressWarnings("unchecked") + final R cc = (R) clazz.newInstance(); for (String header : headers) { FieldUtils.writeField(cc, header, csvRecord.get(header), true); } - ret.add((R) cc); + ret.add(cc); } return ret; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java index 5f5b61d8b..5ce730692 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java @@ -26,52 +26,52 @@ public class EXCELParser { throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException, InvalidFormatException { - OPCPackage pkg = OPCPackage.open(file); - XSSFWorkbook wb = new XSSFWorkbook(pkg); + try (OPCPackage pkg = OPCPackage.open(file); XSSFWorkbook wb = new XSSFWorkbook(pkg)) { - XSSFSheet sheet = wb.getSheet(sheetName); - - if (sheetName == null) { - throw new RuntimeException("Sheet name " + sheetName + " not present in current file"); - } - - List ret = new ArrayList<>(); - - DataFormatter dataFormatter = new DataFormatter(); - Iterator rowIterator = sheet.rowIterator(); - List headers = new ArrayList<>(); - int count = 0; - while (rowIterator.hasNext()) { - Row row = rowIterator.next(); - - if (count == 0) { - Iterator cellIterator = row.cellIterator(); - - while (cellIterator.hasNext()) { - Cell cell = cellIterator.next(); - headers.add(dataFormatter.formatCellValue(cell)); - } - } else { - Class clazz = Class.forName(classForName); - final Object cc = clazz.newInstance(); - - for (int i = 0; i < headers.size(); i++) { - Cell cell = row.getCell(i); - FieldUtils.writeField(cc, headers.get(i), dataFormatter.formatCellValue(cell), true); - - } - - EXCELTopic et = (EXCELTopic) cc; - if (StringUtils.isNotBlank(et.getRcn())) { - ret.add((R) cc); - } + XSSFSheet sheet = wb.getSheet(sheetName); + if (sheetName == null) { + throw new IllegalArgumentException("Sheet name " + sheetName + " not present in current file"); } - count += 1; - } + List ret = new ArrayList<>(); - return ret; + DataFormatter dataFormatter = new DataFormatter(); + Iterator rowIterator = sheet.rowIterator(); + List headers = new ArrayList<>(); + int count = 0; + while (rowIterator.hasNext()) { + Row row = rowIterator.next(); + + if (count == 0) { + Iterator cellIterator = row.cellIterator(); + + while (cellIterator.hasNext()) { + Cell cell = cellIterator.next(); + headers.add(dataFormatter.formatCellValue(cell)); + } + } else { + Class clazz = Class.forName(classForName); + final Object cc = clazz.newInstance(); + + for (int i = 0; i < headers.size(); i++) { + Cell cell = row.getCell(i); + FieldUtils.writeField(cc, headers.get(i), dataFormatter.formatCellValue(cell), true); + + } + + EXCELTopic et = (EXCELTopic) cc; + if (StringUtils.isNotBlank(et.getRcn())) { + ret.add((R) cc); + } + + } + + count += 1; + } + + return ret; + } } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java index c73f7ec3d..b59bd9dbc 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java @@ -25,7 +25,7 @@ import eu.dnetlib.dhp.collection.HttpConnector2; */ public class ReadCSV implements Closeable { private static final Log log = LogFactory.getLog(ReadCSV.class); - private final Configuration conf; + private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final String csvFile; @@ -49,17 +49,16 @@ public class ReadCSV implements Closeable { log.info("Getting CSV file..."); readCSV.execute(classForName); - } } - public void execute(final String classForName) throws Exception { + public void execute(final String classForName) + throws IOException, ClassNotFoundException, IllegalAccessException, InstantiationException { CSVParser csvParser = new CSVParser(); csvParser .parse(csvFile, classForName) .stream() - .forEach(p -> write(p)); - + .forEach(this::write); } @Override @@ -72,18 +71,18 @@ public class ReadCSV implements Closeable { final String hdfsNameNode, final String fileURL) throws Exception { - this.conf = new Configuration(); - this.conf.set("fs.defaultFS", hdfsNameNode); + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); HttpConnector2 httpConnector = new HttpConnector2(); - FileSystem fileSystem = FileSystem.get(this.conf); + FileSystem fileSystem = FileSystem.get(conf); Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, false); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + final FSDataOutputStream fos = fileSystem.create(hdfsWritePath); - this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); this.csvFile = httpConnector.getInputSource(fileURL); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java index 5ce0a681c..330779e24 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java @@ -11,18 +11,20 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.collection.CollectorException; import eu.dnetlib.dhp.collection.HttpConnector2; /** * Applies the parsing of an excel file and writes the Serialization of it in hdfs */ public class ReadExcel implements Closeable { - private static final Log log = LogFactory.getLog(ReadCSV.class); - private final Configuration conf; + private static final Log log = LogFactory.getLog(ReadExcel.class); + private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final InputStream excelFile; @@ -51,13 +53,15 @@ public class ReadExcel implements Closeable { } } - public void execute(final String classForName, final String sheetName) throws Exception { + public void execute(final String classForName, final String sheetName) + throws IOException, ClassNotFoundException, InvalidFormatException, IllegalAccessException, + InstantiationException { + EXCELParser excelParser = new EXCELParser(); excelParser .parse(excelFile, classForName, sheetName) .stream() - .forEach(p -> write(p)); - + .forEach(this::write); } @Override @@ -68,20 +72,20 @@ public class ReadExcel implements Closeable { public ReadExcel( final String hdfsPath, final String hdfsNameNode, - final String fileURL) - throws Exception { - this.conf = new Configuration(); - this.conf.set("fs.defaultFS", hdfsNameNode); + final String fileURL) throws CollectorException, IOException { + + final Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); HttpConnector2 httpConnector = new HttpConnector2(); - FileSystem fileSystem = FileSystem.get(this.conf); + FileSystem fileSystem = FileSystem.get(conf); Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { fileSystem.delete(hdfsWritePath, false); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); + FSDataOutputStream fos = fileSystem.create(hdfsWritePath); - this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); this.excelFile = httpConnector.getInputSourceAsStream(fileURL); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java index d6c17e415..869e1cb68 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJob.java @@ -9,6 +9,7 @@ import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; +import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; @@ -74,7 +75,7 @@ public class GenerateRorActionSetJob { final String jsonConfiguration = IOUtils .toString( - SparkAtomicActionJob.class + GenerateRorActionSetJob.class .getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -108,7 +109,7 @@ public class GenerateRorActionSetJob { private static void processRorOrganizations(final SparkSession spark, final String inputPath, - final String outputPath) throws Exception { + final String outputPath) throws IOException { readInputPath(spark, inputPath) .map( @@ -203,7 +204,7 @@ public class GenerateRorActionSetJob { private static Dataset readInputPath( final SparkSession spark, - final String path) throws Exception { + final String path) throws IOException { try (final FileSystem fileSystem = FileSystem.get(new Configuration()); final InputStream is = fileSystem.open(new Path(path))) { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java index b566a5501..a5acea5ae 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Address.java @@ -7,6 +7,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class Address implements Serializable { + private static final long serialVersionUID = 2444635485253443195L; + @JsonProperty("lat") private Float lat; @@ -37,8 +39,6 @@ public class Address implements Serializable { @JsonProperty("line") private String line; - private final static long serialVersionUID = 2444635485253443195L; - public Float getLat() { return lat; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java index 3dab60a9f..1e7621f98 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Country.java @@ -7,14 +7,14 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class Country implements Serializable { + private static final long serialVersionUID = 4357848706229493627L; + @JsonProperty("country_code") private String countryCode; @JsonProperty("country_name") private String countryName; - private final static long serialVersionUID = 4357848706229493627L; - public String getCountryCode() { return countryCode; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java index 406bfd82c..5ea419b4e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdType.java @@ -13,7 +13,7 @@ public class ExternalIdType implements Serializable { private String preferred; - private final static long serialVersionUID = 2616688352998387611L; + private static final long serialVersionUID = 2616688352998387611L; public ExternalIdType() { } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java index 3fd0c9250..a744a325f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/ExternalIdTypeDeserializer.java @@ -15,8 +15,7 @@ import com.fasterxml.jackson.databind.JsonNode; public class ExternalIdTypeDeserializer extends JsonDeserializer { @Override - public ExternalIdType deserialize(final JsonParser p, final DeserializationContext ctxt) - throws IOException, JsonProcessingException { + public ExternalIdType deserialize(final JsonParser p, final DeserializationContext ctxt) throws IOException { final ObjectCodec oc = p.getCodec(); final JsonNode node = oc.readTree(p); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java index 9616db447..9317a777c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesAdmin.java @@ -19,7 +19,7 @@ public class GeonamesAdmin implements Serializable { @JsonProperty("code") private String code; - private final static long serialVersionUID = 7294958526269195673L; + private static final long serialVersionUID = 7294958526269195673L; public String getAsciiName() { return asciiName; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java index 2b0487168..b13d64b10 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/GeonamesCity.java @@ -31,7 +31,7 @@ public class GeonamesCity implements Serializable { @JsonProperty("license") private License license; - private final static long serialVersionUID = -8389480201526252955L; + private static final long serialVersionUID = -8389480201526252955L; public NameAndCode getNutsLevel2() { return nutsLevel2; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java index 61eb0339d..9a2cb39e3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Label.java @@ -13,7 +13,7 @@ public class Label implements Serializable { @JsonProperty("label") private String label; - private final static long serialVersionUID = -6576156103297850809L; + private static final long serialVersionUID = -6576156103297850809L; public String getIso639() { return iso639; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java index bdc8f4c42..a0f6cf774 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/License.java @@ -13,7 +13,7 @@ public class License implements Serializable { @JsonProperty("license") private String license; - private final static long serialVersionUID = -194308261058176439L; + private static final long serialVersionUID = -194308261058176439L; public String getAttribution() { return attribution; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java index 61d7eb8e6..c0f5d7645 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/NameAndCode.java @@ -7,14 +7,14 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class NameAndCode implements Serializable { + private static final long serialVersionUID = 5459836979206140843L; + @JsonProperty("name") private String name; @JsonProperty("code") private String code; - private final static long serialVersionUID = 5459836979206140843L; - public String getName() { return name; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java index 8b73db98f..db9f96445 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/Relationship.java @@ -7,6 +7,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class Relationship implements Serializable { + private static final long serialVersionUID = 7847399503395576960L; + @JsonProperty("type") private String type; @@ -16,8 +18,6 @@ public class Relationship implements Serializable { @JsonProperty("label") private String label; - private final static long serialVersionUID = 7847399503395576960L; - public String getType() { return type; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java index 94de34fee..b8041cfdf 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/ror/model/RorOrganization.java @@ -11,6 +11,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class RorOrganization implements Serializable { + private static final long serialVersionUID = -2658312087616043225L; + @JsonProperty("ip_addresses") private List ipAddresses = new ArrayList<>(); @@ -59,8 +61,6 @@ public class RorOrganization implements Serializable { @JsonProperty("status") private String status; - private final static long serialVersionUID = -2658312087616043225L; - public List getIpAddresses() { return ipAddresses; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java index c822a6723..8e46ab92b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java @@ -11,8 +11,6 @@ import java.util.Objects; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.gson.Gson; - import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.utils.DHPUtils; @@ -20,12 +18,12 @@ public class AggregatorReport extends LinkedHashMap implements C private static final Logger log = LoggerFactory.getLogger(AggregatorReport.class); - private MessageSender messageSender; + private transient MessageSender messageSender; public AggregatorReport() { } - public AggregatorReport(MessageSender messageSender) throws IOException { + public AggregatorReport(MessageSender messageSender) { this.messageSender = messageSender; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java index 9926f1688..549169673 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java @@ -22,7 +22,7 @@ public abstract class ReportingJob { protected final AggregatorReport report; - public ReportingJob(AggregatorReport report) { + protected ReportingJob(AggregatorReport report) { this.report = report; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java index 65e7805d8..bab44a3b1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/mdstore/MDStoreActionNode.java @@ -25,7 +25,7 @@ public class MDStoreActionNode { NEW_VERSION, ROLLBACK, COMMIT, READ_LOCK, READ_UNLOCK } - public static String NEW_VERSION_URI = "%s/mdstore/%s/newVersion"; + public static final String NEW_VERSION_URI = "%s/mdstore/%s/newVersion"; public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s"; public static final String ROLLBACK_VERSION_URL = "%s/version/%s/abort"; @@ -70,7 +70,7 @@ public class MDStoreActionNode { if (StringUtils.isBlank(hdfsuri)) { throw new IllegalArgumentException("missing or empty argument namenode"); } - final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); + final String mdStoreVersion_params = argumentParser.get(MDSTOREVERSIONPARAM); final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class); if (StringUtils.isBlank(mdStoreVersion.getId())) { @@ -94,7 +94,7 @@ public class MDStoreActionNode { break; } case ROLLBACK: { - final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); + final String mdStoreVersion_params = argumentParser.get(MDSTOREVERSIONPARAM); final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class); if (StringUtils.isBlank(mdStoreVersion.getId())) { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java index 23ee3e2c6..d0872da1d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java @@ -116,7 +116,7 @@ public class CollectorWorker extends ReportingJob { final CollectorPlugin.NAME.OTHER_NAME plugin = Optional .ofNullable(api.getParams().get("other_plugin_type")) .map(CollectorPlugin.NAME.OTHER_NAME::valueOf) - .get(); + .orElseThrow(() -> new IllegalArgumentException("invalid other_plugin_type")); switch (plugin) { case mdstore_mongodb_dump: diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java index f6fdc266e..f1f74b09e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java @@ -207,6 +207,7 @@ public class GenerateNativeStoreSparkJob { totalItems.add(1); try { SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); Document document = reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8))); Node node = document.selectSingleNode(xpath); final String originalIdentifier = node.getText(); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java index a61e2032c..8493a3436 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java @@ -32,7 +32,7 @@ public class HttpConnector2 { private String responseType = null; - private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; + private static final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; public HttpConnector2() { this(new HttpClientParams()); @@ -120,7 +120,7 @@ public class HttpConnector2 { if (is3xx(urlConn.getResponseCode())) { // REDIRECTS final String newUrl = obtainNewLocation(urlConn.getHeaderFields()); - log.info(String.format("The requested url has been moved to %s", newUrl)); + log.info("The requested url has been moved to {}", newUrl); report .put( REPORT_PREFIX + urlConn.getResponseCode(), @@ -140,14 +140,14 @@ public class HttpConnector2 { if (retryAfter > 0) { log .warn( - requestUrl + " - waiting and repeating request after suggested retry-after " - + retryAfter + " sec."); + "{} - waiting and repeating request after suggested retry-after {} sec.", + requestUrl, retryAfter); backoffAndSleep(retryAfter * 1000); } else { log .warn( - requestUrl + " - waiting and repeating request after default delay of " - + getClientParams().getRetryDelay() + " sec."); + "{} - waiting and repeating request after default delay of {} sec.", + requestUrl, getClientParams().getRetryDelay()); backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000); } report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl); @@ -181,12 +181,12 @@ public class HttpConnector2 { } private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { - log.debug("StatusCode: " + urlConn.getResponseMessage()); + log.debug("StatusCode: {}", urlConn.getResponseMessage()); for (Map.Entry> e : urlConn.getHeaderFields().entrySet()) { if (e.getKey() != null) { for (String v : e.getValue()) { - log.debug(" key: " + e.getKey() + " - value: " + v); + log.debug(" key: {} - value: {}", e.getKey(), v); } } } @@ -204,7 +204,7 @@ public class HttpConnector2 { private int obtainRetryAfter(final Map> headerMap) { for (String key : headerMap.keySet()) { - if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (headerMap.get(key).size() > 0) + if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty()) && NumberUtils.isCreatable(headerMap.get(key).get(0))) { return Integer.parseInt(headerMap.get(key).get(0)) + 10; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java index a27314983..549c59720 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java @@ -11,8 +11,6 @@ import org.bson.Document; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.mongodb.MongoClient; -import com.mongodb.MongoClientURI; import com.mongodb.client.MongoCollection; import eu.dnetlib.dhp.aggregation.common.AggregatorReport; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java index 3199af5b7..ec5bab448 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java @@ -23,7 +23,7 @@ public class MongoDbDumpCollectorPlugin implements CollectorPlugin { public static final String PATH_PARAM = "path"; public static final String BODY_JSONPATH = "$.body"; - public FileSystem fileSystem; + private final FileSystem fileSystem; public MongoDbDumpCollectorPlugin(FileSystem fileSystem) { this.fileSystem = fileSystem; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java index 75dd746ea..331dee6b4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.collection.plugin.oai; import java.io.IOException; -import java.io.StringReader; import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; @@ -16,7 +15,6 @@ import org.dom4j.DocumentException; import org.dom4j.DocumentHelper; import org.dom4j.Node; import org.dom4j.io.OutputFormat; -import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,7 +28,8 @@ public class OaiIterator implements Iterator { private static final Logger log = LoggerFactory.getLogger(OaiIterator.class); - private final static String REPORT_PREFIX = "oai:"; + private static final String REPORT_PREFIX = "oai:"; + public static final String UTF_8 = "UTF-8"; private final Queue queue = new PriorityBlockingQueue<>(); @@ -68,7 +67,7 @@ public class OaiIterator implements Iterator { try { this.token = firstPage(); } catch (final CollectorException e) { - throw new RuntimeException(e); + throw new IllegalStateException(e); } } } @@ -90,7 +89,7 @@ public class OaiIterator implements Iterator { try { token = otherPages(token); } catch (final CollectorException e) { - throw new RuntimeException(e); + throw new IllegalStateException(e); } } return res; @@ -99,23 +98,24 @@ public class OaiIterator implements Iterator { @Override public void remove() { + throw new UnsupportedOperationException(); } private String firstPage() throws CollectorException { try { - String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, "UTF-8"); + String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, UTF_8); if (set != null && !set.isEmpty()) { - url += "&set=" + URLEncoder.encode(set, "UTF-8"); + url += "&set=" + URLEncoder.encode(set, UTF_8); } if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX) || fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { - url += "&from=" + URLEncoder.encode(fromDate, "UTF-8"); + url += "&from=" + URLEncoder.encode(fromDate, UTF_8); } if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX) || untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { - url += "&until=" + URLEncoder.encode(untilDate, "UTF-8"); + url += "&until=" + URLEncoder.encode(untilDate, UTF_8); } - log.info("Start harvesting using url: " + url); + log.info("Start harvesting using url: {}", url); return downloadPage(url); } catch (final UnsupportedEncodingException e) { @@ -143,7 +143,7 @@ public class OaiIterator implements Iterator { return downloadPage( baseUrl + "?verb=ListRecords&resumptionToken=" - + URLEncoder.encode(resumptionToken, "UTF-8")); + + URLEncoder.encode(resumptionToken, UTF_8)); } catch (final UnsupportedEncodingException e) { report.put(e.getClass().getName(), e.getMessage()); throw new CollectorException(e); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java index 764c21fc2..67ec22b46 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java @@ -131,7 +131,9 @@ public class RestIterator implements Iterator { private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath) throws TransformerConfigurationException, XPathExpressionException { - transformer = TransformerFactory.newInstance().newTransformer(); + final TransformerFactory factory = TransformerFactory.newInstance(); + factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + transformer = factory.newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3"); xpath = XPathFactory.newInstance().newXPath(); @@ -142,7 +144,7 @@ public class RestIterator implements Iterator { private void initQueue() { query = baseUrl + "?" + queryParams + querySize + queryFormat; - log.info("REST calls starting with " + query); + log.info("REST calls starting with {}", query); } private void disconnect() { @@ -174,7 +176,7 @@ public class RestIterator implements Iterator { try { query = downloadPage(query); } catch (CollectorException e) { - log.debug("CollectorPlugin.next()-Exception: " + e); + log.debug("CollectorPlugin.next()-Exception: {}", e); throw new RuntimeException(e); } } @@ -198,7 +200,7 @@ public class RestIterator implements Iterator { // check if cursor=* is initial set otherwise add it to the queryParam URL if (resumptionType.equalsIgnoreCase("deep-cursor")) { - log.debug("check resumptionType deep-cursor and check cursor=*?" + query); + log.debug("check resumptionType deep-cursor and check cursor=*?{}", query); if (!query.contains("&cursor=")) { query += "&cursor=*"; } @@ -208,16 +210,16 @@ public class RestIterator implements Iterator { log.info("requestig URL [{}]", query); URL qUrl = new URL(query); - log.debug("authMethod :" + authMethod); + log.debug("authMethod: {}", authMethod); if ("bearer".equalsIgnoreCase(this.authMethod)) { - log.trace("authMethod before inputStream: " + resultXml); + log.trace("authMethod before inputStream: {}", resultXml); HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + authToken); conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType()); conn.setRequestMethod("GET"); theHttpInputStream = conn.getInputStream(); } else if (BASIC.equalsIgnoreCase(this.authMethod)) { - log.trace("authMethod before inputStream: " + resultXml); + log.trace("authMethod before inputStream: {}", resultXml); HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Basic " + authToken); conn.setRequestProperty(HttpHeaders.ACCEPT, ContentType.APPLICATION_XML.getMimeType()); @@ -237,13 +239,13 @@ public class RestIterator implements Iterator { if (!(emptyXml).equalsIgnoreCase(resultXml)) { resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE); nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET); - log.debug("nodeList.length: " + nodeList.getLength()); + log.debug("nodeList.length: {}", nodeList.getLength()); for (int i = 0; i < nodeList.getLength(); i++) { StringWriter sw = new StringWriter(); transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw)); String toEnqueue = sw.toString(); if (toEnqueue == null || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)) { - log.warn("The following record resulted in empty item for the feeding queue: " + resultXml); + log.warn("The following record resulted in empty item for the feeding queue: {}", resultXml); } else { recordQueue.add(sw.toString()); } @@ -274,9 +276,9 @@ public class RestIterator implements Iterator { String[] resumptionKeyValue = arrayUrlArgStr.split("="); if (isInteger(resumptionKeyValue[1])) { urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]); - log.debug("discover OldResumptionSize from Url (int): " + urlOldResumptionSize); + log.debug("discover OldResumptionSize from Url (int): {}", urlOldResumptionSize); } else { - log.debug("discover OldResumptionSize from Url (str): " + resumptionKeyValue[1]); + log.debug("discover OldResumptionSize from Url (str): {}", resumptionKeyValue[1]); } } } @@ -295,7 +297,7 @@ public class RestIterator implements Iterator { discoverResultSize += nodeList.getLength(); } } - log.info("discoverResultSize: {}", discoverResultSize); + log.info("discoverResultSize: {}", discoverResultSize); break; case "pagination": diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index c7201a267..a01703675 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -67,10 +67,10 @@ public class TransformSparkJobNode { log.info("outputBasePath: {}", outputBasePath); final String isLookupUrl = parser.get("isLookupUrl"); - log.info(String.format("isLookupUrl: %s", isLookupUrl)); + log.info("isLookupUrl: {}", isLookupUrl); final String dateOfTransformation = parser.get("dateOfTransformation"); - log.info(String.format("dateOfTransformation: %s", dateOfTransformation)); + log.info("dateOfTransformation: {}", dateOfTransformation); final Integer rpt = Optional .ofNullable(parser.get("recordsPerTask")) @@ -129,9 +129,9 @@ public class TransformSparkJobNode { .map((Function) x::call); saveDataset(spark.createDataset(mdstore.rdd(), encoder), outputBasePath + MDSTORE_DATA_PATH); - log.info("Transformed item " + ct.getProcessedItems().count()); - log.info("Total item " + ct.getTotalItems().count()); - log.info("Transformation Error item " + ct.getErrorItems().count()); + log.info("Transformed item {}", ct.getProcessedItems().count()); + log.info("Total item {}", ct.getTotalItems().count()); + log.info("Transformation Error item {}", ct.getErrorItems().count()); final long mdStoreSize = spark.read().load(outputBasePath + MDSTORE_DATA_PATH).count(); writeHdfsFile( diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java index 096d0e289..e93f3b518 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformationFactory.java @@ -13,12 +13,18 @@ import eu.dnetlib.dhp.aggregation.common.AggregationCounter; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.mdstore.MetadataRecord; import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class TransformationFactory { private static final Logger log = LoggerFactory.getLogger(TransformationFactory.class); - public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') where $x//RESOURCE_IDENTIFIER/@value = \"%s\" return $x//CODE/*[local-name() =\"stylesheet\"]"; + public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') " + + + "where $x//RESOURCE_IDENTIFIER/@value = \"%s\" return $x//CODE/*[local-name() =\"stylesheet\"]"; + + private TransformationFactory() { + } public static MapFunction getTransformationPlugin( final Map jobArgument, final AggregationCounter counters, final ISLookUpService isLookupService) @@ -27,7 +33,7 @@ public class TransformationFactory { try { final String transformationPlugin = jobArgument.get("transformationPlugin"); - log.info("Transformation plugin required " + transformationPlugin); + log.info("Transformation plugin required {}", transformationPlugin); switch (transformationPlugin) { case "XSLT_TRANSFORM": { final String transformationRuleId = jobArgument.get("transformationRuleId"); @@ -38,7 +44,7 @@ public class TransformationFactory { final String transformationRule = queryTransformationRuleFromIS( transformationRuleId, isLookupService); - final long dateOfTransformation = new Long(jobArgument.get("dateOfTransformation")); + final long dateOfTransformation = Long.parseLong(jobArgument.get("dateOfTransformation")); return new XSLTTransformationFunction(counters, transformationRule, dateOfTransformation, vocabularies); @@ -46,7 +52,6 @@ public class TransformationFactory { default: throw new DnetTransformationException( "transformation plugin does not exists for " + transformationPlugin); - } } catch (Throwable e) { @@ -55,9 +60,9 @@ public class TransformationFactory { } private static String queryTransformationRuleFromIS(final String transformationRuleId, - final ISLookUpService isLookUpService) throws Exception { + final ISLookUpService isLookUpService) throws DnetTransformationException, ISLookUpException { final String query = String.format(TRULE_XQUERY, transformationRuleId); - System.out.println("asking query to IS: " + query); + log.info("asking query to IS: {}", query); List result = isLookUpService.quickSearchProfile(query); if (result == null || result.isEmpty()) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java index 9da0747e6..3d57b966f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/DateCleaner.java @@ -4,11 +4,6 @@ package eu.dnetlib.dhp.transformation.xslt; import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI; import java.io.Serializable; -import java.time.LocalDate; -import java.time.format.DateTimeFormatter; -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions; import net.sf.saxon.s9api.*; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java index e3d588858..1aa549c09 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/PersonCleaner.java @@ -28,22 +28,12 @@ public class PersonCleaner implements ExtensionFunction, Serializable { private static final Set particles = null; - public PersonCleaner() { - - } - private String normalize(String s) { s = Normalizer.normalize(s, Normalizer.Form.NFD); // was NFD s = s.replaceAll("\\(.+\\)", ""); s = s.replaceAll("\\[.+\\]", ""); s = s.replaceAll("\\{.+\\}", ""); s = s.replaceAll("\\s+-\\s+", "-"); - -// s = s.replaceAll("[\\W&&[^,-]]", " "); - -// System.out.println("class Person: s: " + s); - -// s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}&&[^,-]]", " "); s = s.replaceAll("[\\p{Punct}&&[^-,]]", " "); s = s.replace("\\d", " "); s = s.replace("\\n", " "); @@ -51,8 +41,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { s = s.replaceAll("\\s+", " "); if (s.contains(",")) { - // System.out.println("class Person: s: " + s); - String[] arr = s.split(","); if (arr.length == 1) { @@ -60,9 +48,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { } else if (arr.length > 1) { surname = splitTerms(arr[0]); firstname = splitTermsFirstName(arr[1]); -// System.out.println("class Person: surname: " + surname); -// System.out.println("class Person: firstname: " + firstname); - fullname.addAll(surname); fullname.addAll(firstname); } @@ -82,7 +67,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { } if (lastInitialPosition < fullname.size() - 1) { // Case: Michele G. Artini firstname = fullname.subList(0, lastInitialPosition + 1); - System.out.println("name: " + firstname); surname = fullname.subList(lastInitialPosition + 1, fullname.size()); } else if (hasSurnameInUpperCase) { // Case: Michele ARTINI for (String term : fullname) { @@ -119,16 +103,9 @@ public class PersonCleaner implements ExtensionFunction, Serializable { } private List splitTerms(String s) { - if (particles == null) { - // particles = NGramUtils.loadFromClasspath("/eu/dnetlib/pace/config/name_particles.txt"); - } - List list = Lists.newArrayList(); for (String part : Splitter.on(" ").omitEmptyStrings().split(s)) { - // if (!particles.contains(part.toLowerCase())) { list.add(part); - - // } } return list; } @@ -152,9 +129,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable { public String getNormalisedFullname() { return isAccurate() ? Joiner.on(" ").join(getSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) : Joiner.on(" ").join(fullname); - // return isAccurate() ? - // Joiner.on(" ").join(getCapitalSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) : - // Joiner.on(" ").join(fullname); } public List getCapitalSurname() { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java index 43291e5de..acf48ccc5 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java @@ -1,7 +1,6 @@ package eu.dnetlib.dhp.transformation.xslt; -import java.io.ByteArrayInputStream; import java.io.Serializable; import java.io.StringWriter; import java.nio.charset.StandardCharsets; @@ -18,11 +17,11 @@ import net.sf.saxon.s9api.*; public class XSLTTransformationFunction implements MapFunction, Serializable { - public final static String QNAME_BASE_URI = "http://eu/dnetlib/transform"; + public static final String QNAME_BASE_URI = "http://eu/dnetlib/transform"; - private final static String DATASOURCE_ID_PARAM = "varDataSourceId"; + private static final String DATASOURCE_ID_PARAM = "varDataSourceId"; - private final static String DATASOURCE_NAME_PARAM = "varOfficialName"; + private static final String DATASOURCE_NAME_PARAM = "varOfficialName"; private final AggregationCounter aggregationCounter; @@ -38,8 +37,7 @@ public class XSLTTransformationFunction implements MapFunction { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java index 7200d2896..f2158748b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJobTest.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.actionmanager.bipfinder; +import static org.junit.jupiter.api.Assertions.*; + import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -67,7 +69,7 @@ public class SparkAtomicActionScoreJobTest { } @Test - public void matchOne() throws Exception { + void matchOne() throws Exception { String bipScoresPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") .getPath(); @@ -98,7 +100,7 @@ public class SparkAtomicActionScoreJobTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Publication) aa.getPayload())); - Assertions.assertTrue(tmp.count() == 1); + assertEquals(1, tmp.count()); Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); verificationDataset.createOrReplaceTempView("publication"); @@ -129,7 +131,7 @@ public class SparkAtomicActionScoreJobTest { } @Test - public void matchOneWithTwo() throws Exception { + void matchOneWithTwo() throws Exception { String bipScoresPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") .getPath(); @@ -160,7 +162,7 @@ public class SparkAtomicActionScoreJobTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Publication) aa.getPayload())); - Assertions.assertTrue(tmp.count() == 1); + assertEquals(1, tmp.count()); Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); verificationDataset.createOrReplaceTempView("publication"); @@ -190,23 +192,21 @@ public class SparkAtomicActionScoreJobTest { List tmp_ds = execVerification.filter("id = 'influence'").select("value").collectAsList(); String tmp_influence = tmp_ds.get(0).getString(0); - Assertions - .assertTrue( - "1.47565045883e-08".equals(tmp_influence) || - "1.98956540239e-08".equals(tmp_influence)); + assertTrue( + "1.47565045883e-08".equals(tmp_influence) || + "1.98956540239e-08".equals(tmp_influence)); tmp_influence = tmp_ds.get(1).getString(0); - Assertions - .assertTrue( - "1.47565045883e-08".equals(tmp_influence) || - "1.98956540239e-08".equals(tmp_influence)); + assertTrue( + "1.47565045883e-08".equals(tmp_influence) || + "1.98956540239e-08".equals(tmp_influence)); - Assertions.assertTrue(!tmp_ds.get(0).getString(0).equals(tmp_ds.get(1).getString(0))); + assertNotEquals(tmp_ds.get(1).getString(0), tmp_ds.get(0).getString(0)); } @Test - public void matchTwo() throws Exception { + void matchTwo() throws Exception { String bipScoresPath = getClass() .getResource("/eu/dnetlib/dhp/actionmanager/bipfinder/bip_scores.json") .getPath(); @@ -237,7 +237,7 @@ public class SparkAtomicActionScoreJobTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Publication) aa.getPayload())); - Assertions.assertTrue(tmp.count() == 2); + assertEquals(2, tmp.count()); Dataset verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(Publication.class)); verificationDataset.createOrReplaceTempView("publication"); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java index da5beecf9..dd7e1910f 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java @@ -9,10 +9,10 @@ import org.junit.jupiter.api.Test; import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser; -public class CSVParserTest { +class CSVParserTest { @Test - public void readProgrammeTest() throws Exception { + void readProgrammeTest() throws Exception { String programmecsv = IOUtils .toString( diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java index b7155bc3a..6c309acb3 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java @@ -30,7 +30,7 @@ public class EXCELParserTest { } @Test - public void test1() throws CollectorException, IOException, InvalidFormatException, ClassNotFoundException, + void test1() throws CollectorException, IOException, InvalidFormatException, ClassNotFoundException, IllegalAccessException, InstantiationException { EXCELParser excelParser = new EXCELParser(); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java index 256dc0521..f128b5610 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java @@ -66,7 +66,7 @@ public class PrepareH2020ProgrammeTest { } @Test - public void numberDistinctProgrammeTest() throws Exception { + void numberDistinctProgrammeTest() throws Exception { PrepareProgramme .main( new String[] { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java index 0db3485f5..f0f3532aa 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java @@ -66,7 +66,7 @@ public class PrepareProjectTest { } @Test - public void numberDistinctProjectTest() throws Exception { + void numberDistinctProjectTest() throws Exception { PrepareProjects .main( new String[] { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java index 42e494681..a77dbace4 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java @@ -69,7 +69,7 @@ public class SparkUpdateProjectTest { } @Test - public void numberDistinctProgrammeTest() throws Exception { + void numberDistinctProgrammeTest() throws Exception { SparkAtomicActionJob .main( new String[] { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java index f16901cb4..aa11f4ab5 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/ror/GenerateRorActionSetJobTest.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp.actionmanager.ror; import java.io.FileInputStream; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -30,7 +32,9 @@ class GenerateRorActionSetJobTest { .readValue(IOUtils.toString(getClass().getResourceAsStream("ror_org.json")), RorOrganization.class); final Organization org = GenerateRorActionSetJob.convertRorOrg(r); - System.out.println(mapper.writeValueAsString(org)); + final String s = mapper.writeValueAsString(org); + Assertions.assertTrue(StringUtils.isNotBlank(s)); + System.out.println(s); } @Test @@ -39,7 +43,9 @@ class GenerateRorActionSetJobTest { .readValue(IOUtils.toString(new FileInputStream(local_file_path)), RorOrganization[].class); for (final RorOrganization r : arr) { - GenerateRorActionSetJob.convertRorOrg(r); + Organization o = GenerateRorActionSetJob.convertRorOrg(r); + Assertions.assertNotNull(o); + Assertions.assertTrue(StringUtils.isNotBlank(o.getId())); } } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java index afb6ae6a1..633a47379 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJobTest.java @@ -97,7 +97,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { @Test @Order(1) - public void testGenerateNativeStoreSparkJobRefresh() throws Exception { + void testGenerateNativeStoreSparkJobRefresh() throws Exception { MDStoreVersion mdStoreV1 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_1.json"); FileUtils.forceMkdir(new File(mdStoreV1.getHdfsPath())); @@ -125,7 +125,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { @Test @Order(2) - public void testGenerateNativeStoreSparkJobIncremental() throws Exception { + void testGenerateNativeStoreSparkJobIncremental() throws Exception { MDStoreVersion mdStoreV2 = prepareVersion("/eu/dnetlib/dhp/collection/mdStoreVersion_2.json"); FileUtils.forceMkdir(new File(mdStoreV2.getHdfsPath())); @@ -155,7 +155,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { @Test @Order(3) - public void testTransformSparkJob() throws Exception { + void testTransformSparkJob() throws Exception { setUpVocabulary(); @@ -206,7 +206,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { } @Test - public void testJSONSerialization() throws Exception { + void testJSONSerialization() throws Exception { final String s = IOUtils.toString(getClass().getResourceAsStream("mdStoreVersion_1.json")); System.out.println("s = " + s); final ObjectMapper mapper = new ObjectMapper(); @@ -217,7 +217,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { } @Test - public void testGenerationMetadataRecord() throws Exception { + void testGenerationMetadataRecord() throws Exception { final String xml = IOUtils.toString(this.getClass().getResourceAsStream("./record.xml")); @@ -236,7 +236,7 @@ public class GenerateNativeStoreSparkJobTest extends AbstractVocabularyTest { } @Test - public void testEquals() throws IOException { + void testEquals() throws IOException { final String xml = IOUtils.toString(this.getClass().getResourceAsStream("./record.xml")); final MetadataRecord record = GenerateNativeStoreSparkJob diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java index efe925175..f2b873e10 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java @@ -21,7 +21,7 @@ import eu.dnetlib.dhp.collection.HttpClientParams; * @author js, Andreas Czerniak * */ -public class RestCollectorPluginTest { +class RestCollectorPluginTest { private static final Logger log = LoggerFactory.getLogger(RestCollectorPluginTest.class); @@ -65,7 +65,7 @@ public class RestCollectorPluginTest { @Disabled @Test - public void test() throws CollectorException { + void test() throws CollectorException { AtomicInteger i = new AtomicInteger(0); final Stream stream = rcp.collect(api, new AggregatorReport()); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java index b5ea5f069..f52f4632a 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/CollectorWorkerApplicationTests.java @@ -1,7 +1,7 @@ package eu.dnetlib.dhp.collector.worker; -import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.*; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -11,10 +11,10 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.collection.ApiDescriptor; @Disabled -public class CollectorWorkerApplicationTests { +class CollectorWorkerApplicationTests { @Test - public void testCollectionOAI() throws Exception { + void testCollectionOAI() throws Exception { final ApiDescriptor api = new ApiDescriptor(); api.setId("oai"); api.setProtocol("oai"); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java index 948a8f93b..7bd7baaea 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java @@ -33,7 +33,7 @@ import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @ExtendWith(MockitoExtension.class) -public class TransformationJobTest extends AbstractVocabularyTest { +class TransformationJobTest extends AbstractVocabularyTest { private SparkConf sparkConf; @@ -49,7 +49,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Date cleaner") - public void testDateCleaner() throws Exception { + void testDateCleaner() throws Exception { DateCleaner dc = new DateCleaner(); assertEquals("1982-09-20", dc.clean("20/09/1982")); assertEquals("2002-09-20", dc.clean("20-09-2002")); @@ -60,7 +60,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform Single XML using zenodo_tr XSLTTransformator") - public void testTransformSaxonHE() throws Exception { + void testTransformSaxonHE() throws Exception { // We Set the input Record getting the XML from the classpath final MetadataRecord mr = new MetadataRecord(); @@ -79,7 +79,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform Inst.&Them.v4 record XML with zenodo_tr") - public void testTransformITGv4Zenodo() throws Exception { + void testTransformITGv4Zenodo() throws Exception { // We Set the input Record getting the XML from the classpath final MetadataRecord mr = new MetadataRecord(); @@ -97,7 +97,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform record XML with xslt_cleaning_datarepo_datacite/oaiOpenAIRE") - public void testTransformMostlyUsedScript() throws Exception { + void testTransformMostlyUsedScript() throws Exception { String xslTransformationScript = ""; xslTransformationScript = "/eu/dnetlib/dhp/transform/scripts/xslt_cleaning_datarepo_datacite.xsl"; @@ -119,7 +119,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test Transform record XML with xslt_cleaning_REST_OmicsDI") - public void testTransformRestScript() throws Exception { + void testTransformRestScript() throws Exception { String xslTransformationScript = ""; xslTransformationScript = "/eu/dnetlib/dhp/transform/scripts/xslt_cleaning_REST_OmicsDI.xsl"; @@ -140,7 +140,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test TransformSparkJobNode.main with oaiOpenaire_datacite (v4)") - public void transformTestITGv4OAIdatacite(@TempDir Path testDir) throws Exception { + void transformTestITGv4OAIdatacite(@TempDir Path testDir) throws Exception { try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) { @@ -203,7 +203,7 @@ public class TransformationJobTest extends AbstractVocabularyTest { @Test @DisplayName("Test TransformSparkJobNode.main") - public void transformTest(@TempDir Path testDir) throws Exception { + void transformTest(@TempDir Path testDir) throws Exception { try (SparkSession spark = SparkSession.builder().config(sparkConf).getOrCreate()) { diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java index 2caa66db4..38ffd28fe 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java @@ -7,6 +7,7 @@ import java.io.IOException; import java.io.OutputStreamWriter; import java.nio.charset.StandardCharsets; import java.sql.ResultSet; +import java.sql.SQLException; import java.util.Arrays; import java.util.List; import java.util.function.Consumer; @@ -32,11 +33,11 @@ public class ReadBlacklistFromDB implements Closeable { private final DbClient dbClient; private static final Log log = LogFactory.getLog(ReadBlacklistFromDB.class); - private final Configuration conf; + private final BufferedWriter writer; private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private final static String query = "SELECT source_type, unnest(original_source_objects) as source, " + + private static final String QUERY = "SELECT source_type, unnest(original_source_objects) as source, " + "target_type, unnest(original_target_objects) as target, " + "relationship FROM blacklist WHERE status = 'ACCEPTED'"; @@ -60,12 +61,12 @@ public class ReadBlacklistFromDB implements Closeable { dbPassword)) { log.info("Processing blacklist..."); - rbl.execute(query, rbl::processBlacklistEntry); + rbl.execute(QUERY, rbl::processBlacklistEntry); } } - public void execute(final String sql, final Function> producer) throws Exception { + public void execute(final String sql, final Function> producer) { final Consumer consumer = rs -> producer.apply(rs).forEach(r -> writeRelation(r)); @@ -99,7 +100,7 @@ public class ReadBlacklistFromDB implements Closeable { return Arrays.asList(direct, inverse); - } catch (final Exception e) { + } catch (final SQLException e) { throw new RuntimeException(e); } } @@ -112,12 +113,14 @@ public class ReadBlacklistFromDB implements Closeable { public ReadBlacklistFromDB( final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword) - throws Exception { + throws IOException { this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); - this.conf = new Configuration(); - this.conf.set("fs.defaultFS", hdfsNameNode); - FileSystem fileSystem = FileSystem.get(this.conf); + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); Path hdfsWritePath = new Path(hdfsPath); FSDataOutputStream fsDataOutputStream = null; if (fileSystem.exists(hdfsWritePath)) { @@ -133,7 +136,7 @@ public class ReadBlacklistFromDB implements Closeable { try { writer.write(OBJECT_MAPPER.writeValueAsString(r)); writer.newLine(); - } catch (final Exception e) { + } catch (final IOException e) { throw new RuntimeException(e); } } diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java index 91bcb9d1c..38ef63d27 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java @@ -114,10 +114,8 @@ public class SparkRemoveBlacklistedRelationJob { .map((MapFunction, Relation>) c -> { Relation ir = c._1(); Optional obl = Optional.ofNullable(c._2()); - if (obl.isPresent()) { - if (ir.equals(obl.get())) { - return null; - } + if (obl.isPresent() && ir.equals(obl.get())) { + return null; } return ir; }, Encoders.bean(Relation.class)) diff --git a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java index 585848589..058ea271c 100644 --- a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java +++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java @@ -62,7 +62,7 @@ public class BlackListTest { } @Test - public void noRemoveTest() throws Exception { + void noRemoveTest() throws Exception { SparkRemoveBlacklistedRelationJob .main( new String[] { @@ -89,7 +89,7 @@ public class BlackListTest { } @Test - public void removeNoMergeMatchTest() throws Exception { + void removeNoMergeMatchTest() throws Exception { SparkRemoveBlacklistedRelationJob .main( new String[] { @@ -128,7 +128,7 @@ public class BlackListTest { } @Test - public void removeMergeMatchTest() throws Exception { + void removeMergeMatchTest() throws Exception { SparkRemoveBlacklistedRelationJob .main( new String[] { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java index 429eb7d11..584438d44 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/model/EventFactory.java @@ -9,19 +9,24 @@ import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.DateUtils; +import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerRelatedDatasource; +import eu.dnetlib.broker.objects.OaBrokerTypedValue; import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; import eu.dnetlib.dhp.broker.oa.util.UpdateInfo; public class EventFactory { - private final static String PRODUCER_ID = "OpenAIRE"; + private static final String PRODUCER_ID = "OpenAIRE"; - private final static String[] DATE_PATTERNS = { + private static final String[] DATE_PATTERNS = { "yyyy-MM-dd" }; + private EventFactory() { + } + public static Event newBrokerEvent(final UpdateInfo updateInfo) { final Event res = new Event(); @@ -61,7 +66,7 @@ public class EventFactory { map.setTargetResultId(target.getOpenaireId()); final List titles = target.getTitles(); - if (titles.size() > 0) { + if (!titles.isEmpty()) { map.setTargetResultTitle(titles.get(0)); } @@ -70,8 +75,12 @@ public class EventFactory { map.setTargetDateofacceptance(date); } - map.setTargetSubjects(target.getSubjects().stream().map(s -> s.getValue()).collect(Collectors.toList())); - map.setTargetAuthors(target.getCreators().stream().map(a -> a.getFullname()).collect(Collectors.toList())); + map + .setTargetSubjects( + target.getSubjects().stream().map(OaBrokerTypedValue::getValue).collect(Collectors.toList())); + map + .setTargetAuthors( + target.getCreators().stream().map(OaBrokerAuthor::getFullname).collect(Collectors.toList())); // PROVENANCE INFO map.setTrust(updateInfo.getTrust()); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java index 89fc2e703..f0aa6491f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/CheckDuplictedIdsJob.java @@ -10,15 +10,11 @@ import org.apache.spark.sql.Encoder; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; -import org.apache.spark.sql.TypedColumn; import org.apache.spark.sql.expressions.Aggregator; import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.broker.model.Event; import eu.dnetlib.dhp.broker.oa.util.ClusterUtils; @@ -88,8 +84,7 @@ class CountAggregator extends Aggregator, Tuple2 merge(final Tuple2 arg0, final Tuple2 arg1) { - final String s = StringUtils.defaultIfBlank(arg0._1, arg1._1); - return new Tuple2<>(s, arg0._2 + arg1._2); + return doMerge(arg0, arg1); } @Override @@ -99,6 +94,10 @@ class CountAggregator extends Aggregator, Tuple2 reduce(final Tuple2 arg0, final Tuple2 arg1) { + return doMerge(arg0, arg1); + } + + private Tuple2 doMerge(final Tuple2 arg0, final Tuple2 arg1) { final String s = StringUtils.defaultIfBlank(arg0._1, arg1._1); return new Tuple2<>(s, arg0._2 + arg1._2); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java index 05ff2aa38..0fbc763e0 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexEventSubsetJob.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.broker.oa; +import java.io.IOException; import java.util.Date; import java.util.HashMap; import java.util.Map; @@ -98,7 +99,6 @@ public class IndexEventSubsetJob { .javaRDD(); final Map esCfg = new HashMap<>(); - // esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54"); esCfg.put("es.index.auto.create", "false"); esCfg.put("es.nodes", indexHost); @@ -114,11 +114,11 @@ public class IndexEventSubsetJob { log.info("*** Deleting old events"); final String message = deleteOldEvents(brokerApiBaseUrl, now - 1000); - log.info("*** Deleted events: " + message); + log.info("*** Deleted events: {}", message); } - private static String deleteOldEvents(final String brokerApiBaseUrl, final long l) throws Exception { + private static String deleteOldEvents(final String brokerApiBaseUrl, final long l) throws IOException { final String url = brokerApiBaseUrl + "/api/events/byCreationDate/0/" + l; final HttpDelete req = new HttpDelete(url); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java index 80549e1ce..e8ef5dd3e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexNotificationsJob.java @@ -33,11 +33,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.broker.model.ConditionParams; -import eu.dnetlib.dhp.broker.model.Event; -import eu.dnetlib.dhp.broker.model.MappedFields; -import eu.dnetlib.dhp.broker.model.Notification; -import eu.dnetlib.dhp.broker.model.Subscription; +import eu.dnetlib.dhp.broker.model.*; import eu.dnetlib.dhp.broker.oa.util.ClusterUtils; import eu.dnetlib.dhp.broker.oa.util.NotificationGroup; import eu.dnetlib.dhp.broker.oa.util.SubscriptionUtils; @@ -89,9 +85,9 @@ public class IndexNotificationsJob { final List subscriptions = listSubscriptions(brokerApiBaseUrl); - log.info("Number of subscriptions: " + subscriptions.size()); + log.info("Number of subscriptions: {}", subscriptions.size()); - if (subscriptions.size() > 0) { + if (!subscriptions.isEmpty()) { final Encoder ngEncoder = Encoders.bean(NotificationGroup.class); final Encoder nEncoder = Encoders.bean(Notification.class); final Dataset notifications = ClusterUtils @@ -106,7 +102,6 @@ public class IndexNotificationsJob { .javaRDD(); final Map esCfg = new HashMap<>(); - // esCfg.put("es.nodes", "10.19.65.51, 10.19.65.52, 10.19.65.53, 10.19.65.54"); esCfg.put("es.index.auto.create", "false"); esCfg.put("es.nodes", indexHost); @@ -122,7 +117,7 @@ public class IndexNotificationsJob { log.info("*** Deleting old notifications"); final String message = deleteOldNotifications(brokerApiBaseUrl, startTime - 1000); - log.info("*** Deleted notifications: " + message); + log.info("*** Deleted notifications: {}", message); log.info("*** sendNotifications (emails, ...)"); sendNotifications(brokerApiBaseUrl, startTime - 1000); @@ -174,28 +169,28 @@ public class IndexNotificationsJob { return false; } - if (conditions.containsKey("targetDateofacceptance") && !conditions + if (conditions.containsKey("targetDateofacceptance") && conditions .get("targetDateofacceptance") .stream() - .anyMatch( + .noneMatch( c -> SubscriptionUtils .verifyDateRange(map.getTargetDateofacceptance(), c.getValue(), c.getOtherValue()))) { return false; } if (conditions.containsKey("targetResultTitle") - && !conditions + && conditions .get("targetResultTitle") .stream() - .anyMatch(c -> SubscriptionUtils.verifySimilar(map.getTargetResultTitle(), c.getValue()))) { + .noneMatch(c -> SubscriptionUtils.verifySimilar(map.getTargetResultTitle(), c.getValue()))) { return false; } if (conditions.containsKey("targetAuthors") - && !conditions + && conditions .get("targetAuthors") .stream() - .allMatch(c -> SubscriptionUtils.verifyListSimilar(map.getTargetAuthors(), c.getValue()))) { + .noneMatch(c -> SubscriptionUtils.verifyListSimilar(map.getTargetAuthors(), c.getValue()))) { return false; } @@ -207,7 +202,7 @@ public class IndexNotificationsJob { } - private static List listSubscriptions(final String brokerApiBaseUrl) throws Exception { + private static List listSubscriptions(final String brokerApiBaseUrl) throws IOException { final String url = brokerApiBaseUrl + "/api/subscriptions"; final HttpGet req = new HttpGet(url); @@ -222,7 +217,7 @@ public class IndexNotificationsJob { } } - private static String deleteOldNotifications(final String brokerApiBaseUrl, final long l) throws Exception { + private static String deleteOldNotifications(final String brokerApiBaseUrl, final long l) throws IOException { final String url = brokerApiBaseUrl + "/api/notifications/byDate/0/" + l; final HttpDelete req = new HttpDelete(url); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java index 380a689e4..0c74d8a6d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/IndexOnESJob.java @@ -7,6 +7,7 @@ import java.util.Map; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.elasticsearch.spark.rdd.api.java.JavaEsSpark; @@ -61,7 +62,7 @@ public class IndexOnESJob { final JavaRDD inputRdd = ClusterUtils .readPath(spark, eventsPath, Event.class) - .map(IndexOnESJob::eventAsJsonString, Encoders.STRING()) + .map((MapFunction) IndexOnESJob::eventAsJsonString, Encoders.STRING()) .javaRDD(); final Map esCfg = new HashMap<>(); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java index e061c0d3b..b5c891bb8 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PartitionEventsByDsIdJob.java @@ -77,9 +77,8 @@ public class PartitionEventsByDsIdJob { } log.info("validOpendoarIds: {}", validOpendoarIds); - runWithSparkSession(conf, isSparkSessionManaged, spark -> { - - ClusterUtils + runWithSparkSession( + conf, isSparkSessionManaged, spark -> ClusterUtils .readPath(spark, eventsPath, Event.class) .filter((FilterFunction) e -> StringUtils.isNotBlank(e.getMap().getTargetDatasourceId())) .filter((FilterFunction) e -> e.getMap().getTargetDatasourceId().startsWith(OPENDOAR_NSPREFIX)) @@ -92,9 +91,7 @@ public class PartitionEventsByDsIdJob { .partitionBy("group") .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(partitionPath); - - }); + .json(partitionPath)); renameSubDirs(partitionPath); } @@ -102,14 +99,14 @@ public class PartitionEventsByDsIdJob { private static void renameSubDirs(final String path) throws IOException { final FileSystem fs = FileSystem.get(new Configuration()); - log.info("** Renaming subdirs of " + path); + log.info("** Renaming subdirs of {}", path); for (final FileStatus fileStatus : fs.listStatus(new Path(path))) { if (fileStatus.isDirectory()) { final Path oldPath = fileStatus.getPath(); final String oldName = oldPath.getName(); if (oldName.contains("=")) { final Path newPath = new Path(path + "/" + StringUtils.substringAfter(oldName, "=")); - log.info(" * " + oldPath.getName() + " -> " + newPath.getName()); + log.info(" * {} -> {}", oldPath.getName(), newPath.getName()); fs.rename(oldPath, newPath); } } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java index 61ab5e250..2a247b7aa 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/PrepareRelatedDatasourcesJob.java @@ -105,7 +105,7 @@ public class PrepareRelatedDatasourcesJob { .filter((FilterFunction) r -> !ClusterUtils.isDedupRoot(r.getId())) .filter((FilterFunction) r -> r.getDataInfo().getDeletedbyinference()) .map( - (MapFunction) r -> DatasourceRelationsAccumulator.calculateTuples(r), + (MapFunction) DatasourceRelationsAccumulator::calculateTuples, Encoders.bean(DatasourceRelationsAccumulator.class)) .flatMap( (FlatMapFunction>) acc -> acc diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java index fba82aa8c..87fed7db7 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcher.java @@ -26,7 +26,7 @@ public abstract class UpdateMatcher { private final BiConsumer compileHighlightFunction; private final Function highlightToStringFunction; - public UpdateMatcher(final int maxNumber, final Function topicFunction, + protected UpdateMatcher(final int maxNumber, final Function topicFunction, final BiConsumer compileHighlightFunction, final Function highlightToStringFunction) { this.maxNumber = maxNumber; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java index 2f73a2448..88ad48178 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedDatasets/AbstractEnrichMissingDataset.java @@ -14,11 +14,11 @@ import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; public abstract class AbstractEnrichMissingDataset extends UpdateMatcher { - public AbstractEnrichMissingDataset(final Topic topic) { + protected AbstractEnrichMissingDataset(final Topic topic) { super(10, rel -> topic, (p, rel) -> p.getDatasets().add(rel), - rel -> rel.getOpenaireId()); + OaBrokerRelatedDataset::getOpenaireId); } protected abstract boolean filterByType(String relType); diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java index ab2735f2a..440602772 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMissingProject.java @@ -15,7 +15,7 @@ public class EnrichMissingProject extends UpdateMatcher { super(20, prj -> Topic.ENRICH_MISSING_PROJECT, (p, prj) -> p.getProjects().add(prj), - prj -> prj.getOpenaireId()); + OaBrokerProject::getOpenaireId); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java index 85086a6df..2e523da2f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedProjects/EnrichMoreProject.java @@ -18,7 +18,7 @@ public class EnrichMoreProject extends UpdateMatcher { super(20, prj -> Topic.ENRICH_MORE_PROJECT, (p, prj) -> p.getProjects().add(prj), - prj -> prj.getOpenaireId()); + OaBrokerProject::getOpenaireId); } @Override @@ -32,7 +32,7 @@ public class EnrichMoreProject extends UpdateMatcher { final Set existingProjects = target .getProjects() .stream() - .map(p -> p.getOpenaireId()) + .map(OaBrokerProject::getOpenaireId) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java index 7ba3e5e02..a709eea30 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedPublications/AbstractEnrichMissingPublication.java @@ -14,11 +14,11 @@ import eu.dnetlib.dhp.broker.oa.util.BrokerConstants; public abstract class AbstractEnrichMissingPublication extends UpdateMatcher { - public AbstractEnrichMissingPublication(final Topic topic) { + protected AbstractEnrichMissingPublication(final Topic topic) { super(10, rel -> topic, (p, rel) -> p.getPublications().add(rel), - rel -> rel.getOpenaireId()); + OaBrokerRelatedPublication::getOpenaireId); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java index a638024bc..a75666027 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMissingSoftware.java @@ -16,7 +16,7 @@ public class EnrichMissingSoftware super(10, s -> Topic.ENRICH_MISSING_SOFTWARE, (p, s) -> p.getSoftwares().add(s), - s -> s.getOpenaireId()); + OaBrokerRelatedSoftware::getOpenaireId); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java index a6cd34359..ec340b42f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/relatedSoftware/EnrichMoreSoftware.java @@ -18,7 +18,7 @@ public class EnrichMoreSoftware extends UpdateMatcher { super(10, s -> Topic.ENRICH_MORE_SOFTWARE, (p, s) -> p.getSoftwares().add(s), - s -> s.getOpenaireId()); + OaBrokerRelatedSoftware::getOpenaireId); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java index e834d1dde..125eac862 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingAuthorOrcid.java @@ -20,7 +20,7 @@ public class EnrichMissingAuthorOrcid extends UpdateMatcher { super(40, aut -> Topic.ENRICH_MISSING_AUTHOR_ORCID, (p, aut) -> p.getCreators().add(aut), - aut -> aut.getOrcid()); + OaBrokerAuthor::getOrcid); } @Override diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java index 4e4003890..f32cec90d 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPid.java @@ -23,7 +23,7 @@ public class EnrichMissingPid extends UpdateMatcher { protected List findDifferences(final OaBrokerMainEntity source, final OaBrokerMainEntity target) { - if (target.getPids().size() > 0) { + if (!target.getPids().isEmpty()) { return Arrays.asList(); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java index cb3ea5464..f07bbd52f 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingSubject.java @@ -35,7 +35,7 @@ public class EnrichMissingSubject extends UpdateMatcher { } }, (p, s) -> p.getSubjects().add(s), - s -> subjectAsString(s)); + EnrichMissingSubject::subjectAsString); } @Override @@ -49,7 +49,7 @@ public class EnrichMissingSubject extends UpdateMatcher { final Set existingSubject = target .getSubjects() .stream() - .map(s -> subjectAsString(s)) + .map(EnrichMissingSubject::subjectAsString) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java index 46f6fa80c..585531095 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreOpenAccess.java @@ -33,7 +33,7 @@ public class EnrichMoreOpenAccess extends UpdateMatcher { .getInstances() .stream() .filter(i -> i.getLicense().equals(BrokerConstants.OPEN_ACCESS)) - .map(i -> i.getUrl()) + .map(OaBrokerInstance::getUrl) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java index 609437b9d..a98b96b99 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMorePid.java @@ -18,7 +18,7 @@ public class EnrichMorePid extends UpdateMatcher { super(20, pid -> Topic.ENRICH_MORE_PID, (p, pid) -> p.getPids().add(pid), - pid -> pidAsString(pid)); + EnrichMorePid::pidAsString); } @Override @@ -32,7 +32,7 @@ public class EnrichMorePid extends UpdateMatcher { final Set existingPids = target .getPids() .stream() - .map(pid -> pidAsString(pid)) + .map(EnrichMorePid::pidAsString) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java index 1f6edf96e..b62b509c7 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMoreSubject.java @@ -35,7 +35,7 @@ public class EnrichMoreSubject extends UpdateMatcher { } }, (p, s) -> p.getSubjects().add(s), - s -> subjectAsString(s)); + EnrichMoreSubject::subjectAsString); } @Override @@ -49,7 +49,7 @@ public class EnrichMoreSubject extends UpdateMatcher { final Set existingSubjects = target .getSubjects() .stream() - .map(pid -> subjectAsString(pid)) + .map(EnrichMoreSubject::subjectAsString) .collect(Collectors.toSet()); return source diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java index 2055a014e..790ca4e61 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/BrokerConstants.java @@ -12,6 +12,9 @@ import eu.dnetlib.dhp.schema.common.ModelSupport; public class BrokerConstants { + private BrokerConstants() { + } + public static final String OPEN_ACCESS = "OPEN"; public static final String IS_MERGED_IN_CLASS = ModelConstants.IS_MERGED_IN; diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java index 7c4ca1d22..2e9c03990 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ClusterUtils.java @@ -24,6 +24,9 @@ public class ClusterUtils { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private ClusterUtils() { + } + public static void createDirIfMissing(final SparkSession spark, final String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java index 6f0a52244..bc37203d3 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/ConversionUtils.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.broker.oa.util; import java.util.ArrayList; import java.util.List; import java.util.Objects; +import java.util.function.Function; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; @@ -13,8 +14,6 @@ import org.dom4j.DocumentHelper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.common.base.Function; - import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerExternalReference; import eu.dnetlib.broker.objects.OaBrokerInstance; @@ -46,6 +45,9 @@ public class ConversionUtils { private static final Logger log = LoggerFactory.getLogger(ConversionUtils.class); + private ConversionUtils() { + } + public static List oafInstanceToBrokerInstances(final Instance i) { if (i == null) { return new ArrayList<>(); @@ -69,7 +71,7 @@ public class ConversionUtils { return sp != null ? new OaBrokerTypedValue(classId(sp.getQualifier()), sp.getValue()) : null; } - public static final OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) { + public static OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) { if (d == null) { return null; } @@ -100,7 +102,7 @@ public class ConversionUtils { return res; } - public static final OaBrokerMainEntity oafResultToBrokerResult(final Result result) { + public static OaBrokerMainEntity oafResultToBrokerResult(final Result result) { if (result == null) { return null; } @@ -142,12 +144,12 @@ public class ConversionUtils { final String pids = author.getPid() != null ? author .getPid() .stream() - .filter(pid -> pid != null) + .filter(Objects::nonNull) .filter(pid -> pid.getQualifier() != null) .filter(pid -> pid.getQualifier().getClassid() != null) .filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID)) - .map(pid -> pid.getValue()) - .map(pid -> cleanOrcid(pid)) + .map(StructuredProperty::getValue) + .map(ConversionUtils::cleanOrcid) .filter(StringUtils::isNotBlank) .findFirst() .orElse(null) : null; @@ -187,7 +189,7 @@ public class ConversionUtils { return res; } - public static final OaBrokerProject oafProjectToBrokerProject(final Project p) { + public static OaBrokerProject oafProjectToBrokerProject(final Project p) { if (p == null) { return null; } @@ -206,14 +208,14 @@ public class ConversionUtils { res.setJurisdiction(fdoc.valueOf("/fundingtree/funder/jurisdiction")); res.setFundingProgram(fdoc.valueOf("//funding_level_0/name")); } catch (final DocumentException e) { - log.error("Error in record " + p.getId() + ": invalid fundingtree: " + ftree); + log.error("Error in record {}: invalid fundingtree: {}", p.getId(), ftree); } } return res; } - public static final OaBrokerRelatedSoftware oafSoftwareToBrokerSoftware(final Software sw) { + public static OaBrokerRelatedSoftware oafSoftwareToBrokerSoftware(final Software sw) { if (sw == null) { return null; } @@ -228,7 +230,7 @@ public class ConversionUtils { return res; } - public static final OaBrokerRelatedDatasource oafDatasourceToBrokerDatasource(final Datasource ds) { + public static OaBrokerRelatedDatasource oafDatasourceToBrokerDatasource(final Datasource ds) { if (ds == null) { return null; } @@ -241,7 +243,7 @@ public class ConversionUtils { } private static String first(final List list) { - return list != null && list.size() > 0 ? list.get(0) : null; + return list != null && !list.isEmpty() ? list.get(0) : null; } private static String kvValue(final KeyValue kv) { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java index c693be93c..658a42ac1 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/DatasourceRelationsAccumulator.java @@ -10,6 +10,8 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; +import eu.dnetlib.dhp.schema.oaf.Instance; +import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple3; @@ -39,7 +41,7 @@ public class DatasourceRelationsAccumulator implements Serializable { final Set collectedFromSet = r .getCollectedfrom() .stream() - .map(kv -> kv.getKey()) + .map(KeyValue::getKey) .filter(StringUtils::isNotBlank) .distinct() .collect(Collectors.toSet()); @@ -47,10 +49,10 @@ public class DatasourceRelationsAccumulator implements Serializable { final Set hostedBySet = r .getInstance() .stream() - .map(i -> i.getHostedby()) + .map(Instance::getHostedby) .filter(Objects::nonNull) .filter(kv -> !StringUtils.equalsIgnoreCase(kv.getValue(), "Unknown Repository")) - .map(kv -> kv.getKey()) + .map(KeyValue::getKey) .filter(StringUtils::isNotBlank) .distinct() .filter(id -> !collectedFromSet.contains(id)) diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java index 103751f95..b2214e07e 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/EventFinder.java @@ -41,8 +41,6 @@ import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup; public class EventFinder { - private static final Logger log = LoggerFactory.getLogger(EventFinder.class); - private static final List> matchers = new ArrayList<>(); static { matchers.add(new EnrichMissingAbstract()); @@ -72,6 +70,9 @@ public class EventFinder { matchers.add(new EnrichMissingDatasetIsSupplementedBy()); } + private EventFinder() { + } + public static EventGroup generateEvents(final ResultGroup results, final Set dsIdWhitelist, final Set dsIdBlacklist, diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java index adb1c753b..cf3562193 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/SubscriptionUtils.java @@ -12,6 +12,9 @@ public class SubscriptionUtils { private static final long ONE_DAY = 86_400_000; + private SubscriptionUtils() { + } + public static boolean verifyListSimilar(final List list, final String value) { return list.stream().anyMatch(s -> verifySimilar(s, value)); } diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java index 72fe1b204..a49801f32 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/TrustUtils.java @@ -30,7 +30,9 @@ public class TrustUtils { } catch (final IOException e) { log.error("Error loading dedupConfig, e"); } + } + private TrustUtils() { } protected static float calculateTrust(final OaBrokerMainEntity r1, final OaBrokerMainEntity r2) { diff --git a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java index 5a9cb5e09..d29414e52 100644 --- a/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java +++ b/dhp-workflows/dhp-broker-events/src/main/java/eu/dnetlib/dhp/broker/oa/util/UpdateInfo.java @@ -88,14 +88,14 @@ public final class UpdateInfo { .getDatasources() .stream() .filter(ds -> ds.getRelType().equals(BrokerConstants.COLLECTED_FROM_REL)) - .map(ds -> ds.getName()) + .map(OaBrokerRelatedDatasource::getName) .findFirst() .orElse(""); final String provType = getSource() .getDatasources() .stream() .filter(ds -> ds.getRelType().equals(BrokerConstants.COLLECTED_FROM_REL)) - .map(ds -> ds.getType()) + .map(OaBrokerRelatedDatasource::getType) .findFirst() .orElse(""); diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java index 8fa95abe5..45bfc785f 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/UpdateMatcherTest.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.broker.oa.matchers; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.Arrays; @@ -72,7 +73,7 @@ class UpdateMatcherTest { final Collection> list = matcher .searchUpdatesForRecord(res, targetDs, Arrays.asList(p1, p2, p3, p4), null); - assertTrue(list.size() == 1); + assertEquals(1, list.size()); } @Test @@ -127,7 +128,7 @@ class UpdateMatcherTest { final Collection> list = matcher .searchUpdatesForRecord(res, targetDs, Arrays.asList(p1, p2, p3, p4), null); - assertTrue(list.size() == 1); + assertEquals(1, list.size()); } } diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java index 77a19af4c..550ded9f4 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/matchers/simple/EnrichMissingPublicationDateTest.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.broker.oa.matchers.simple; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.List; @@ -32,7 +33,7 @@ class EnrichMissingPublicationDateTest { final OaBrokerMainEntity target = new OaBrokerMainEntity(); source.setPublicationdate("2018"); final List list = matcher.findDifferences(source, target); - assertTrue(list.size() == 1); + assertEquals(1, list.size()); } @Test diff --git a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java index 974baa28b..a8bc03e31 100644 --- a/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java +++ b/dhp-workflows/dhp-broker-events/src/test/java/eu/dnetlib/dhp/broker/oa/util/TrustUtilsTest.java @@ -9,67 +9,67 @@ import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerTypedValue; -public class TrustUtilsTest { +class TrustUtilsTest { private static final double THRESHOLD = 0.95; @Test - public void rescaleTest_1() { + void rescaleTest_1() { verifyValue(-0.3, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_2() { + void rescaleTest_2() { verifyValue(0.0, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_3() { + void rescaleTest_3() { verifyValue(0.5, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_4() { + void rescaleTest_4() { verifyValue(0.95, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_5() { + void rescaleTest_5() { verifyValue(0.96, BrokerConstants.MIN_TRUST); } @Test - public void rescaleTest_6() { + void rescaleTest_6() { verifyValue(0.97, 0.3f); } @Test - public void rescaleTest_7() { + void rescaleTest_7() { verifyValue(0.98, 0.45f); } @Test - public void rescaleTest_8() { + void rescaleTest_8() { verifyValue(0.99, 0.6f); } @Test - public void rescaleTest_9() { + void rescaleTest_9() { verifyValue(1.00, BrokerConstants.MAX_TRUST); } @Test - public void rescaleTest_10() { + void rescaleTest_10() { verifyValue(1.01, BrokerConstants.MAX_TRUST); } @Test - public void rescaleTest_11() { + void rescaleTest_11() { verifyValue(2.00, BrokerConstants.MAX_TRUST); } @Test - public void test() throws Exception { + void test() { final OaBrokerMainEntity r1 = new OaBrokerMainEntity(); r1.getTitles().add("D-NET Service Package: Data Import"); r1.getPids().add(new OaBrokerTypedValue("doi", "123")); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java index 647a1b9c8..6a9b21b00 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java @@ -20,6 +20,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; @@ -43,23 +44,26 @@ abstract class AbstractSparkAction implements Serializable { protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - public ArgumentApplicationParser parser; // parameters for the spark action - public SparkSession spark; // the spark session + public final ArgumentApplicationParser parser; // parameters for the spark action + public final SparkSession spark; // the spark session - public AbstractSparkAction(ArgumentApplicationParser parser, SparkSession spark) { + protected AbstractSparkAction(ArgumentApplicationParser parser, SparkSession spark) { this.parser = parser; this.spark = spark; } public List getConfigurations(ISLookUpService isLookUpService, String orchestrator) - throws ISLookUpException, DocumentException, IOException { + throws ISLookUpException, DocumentException, IOException, SAXException { final String xquery = String.format("/RESOURCE_PROFILE[.//DEDUPLICATION/ACTION_SET/@id = '%s']", orchestrator); String orchestratorProfile = isLookUpService.getResourceProfileByQuery(xquery); - final Document doc = new SAXReader().read(new StringReader(orchestratorProfile)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + + final Document doc = reader.read(new StringReader(orchestratorProfile)); final String actionSetId = doc.valueOf("//DEDUPLICATION/ACTION_SET/@id"); @@ -93,7 +97,7 @@ abstract class AbstractSparkAction implements Serializable { } abstract void run(ISLookUpService isLookUpService) - throws DocumentException, IOException, ISLookUpException; + throws DocumentException, IOException, ISLookUpException, SAXException; protected static SparkSession getSparkSession(SparkConf conf) { return SparkSession.builder().config(conf).getOrCreate(); @@ -139,9 +143,7 @@ abstract class AbstractSparkAction implements Serializable { c -> c .stream() .filter(Objects::nonNull) - .filter(kv -> ModelConstants.OPENORGS_NAME.equals(kv.getValue())) - .findFirst() - .isPresent()) + .anyMatch(kv -> ModelConstants.OPENORGS_NAME.equals(kv.getValue()))) .orElse(false); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java index 558c7c440..9d767c4d2 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DatePicker.java @@ -23,6 +23,9 @@ public class DatePicker { private static final int YEAR_LB = 1300; private static final int YEAR_UB = Year.now().getValue() + 5; + private DatePicker() { + } + public static Field pick(final Collection dateofacceptance) { final Map frequencies = dateofacceptance @@ -61,7 +64,7 @@ public class DatePicker { .entrySet() .stream() .filter(e -> e.getValue() >= acceptThreshold) - .map(e -> e.getKey()) + .map(Map.Entry::getKey) .collect(Collectors.toList()); // cannot find strong majority diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index 5ba6f6e6d..d65853aff 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -10,14 +10,11 @@ import org.apache.spark.api.java.function.MapGroupsFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; -import eu.dnetlib.dhp.oa.dedup.model.Identifier; import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -25,11 +22,12 @@ import scala.Tuple2; public class DedupRecordFactory { - private static final Logger log = LoggerFactory.getLogger(DedupRecordFactory.class); - protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + private DedupRecordFactory() { + } + public static Dataset createDedupRecord( final SparkSession spark, final DataInfo dataInfo, @@ -67,7 +65,7 @@ public class DedupRecordFactory { value._1()._1(), value._2()._2()), Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))) .groupByKey( - (MapFunction, String>) entity -> entity._1(), Encoders.STRING()) + (MapFunction, String>) Tuple2::_1, Encoders.STRING()) .mapGroups( (MapGroupsFunction, T>) (key, values) -> entityMerger(key, values, ts, dataInfo, clazz), @@ -91,7 +89,7 @@ public class DedupRecordFactory { entity.mergeFrom(duplicate); if (ModelSupport.isSubClass(duplicate, Result.class)) { Result r1 = (Result) duplicate; - if (r1.getAuthor() != null && r1.getAuthor().size() > 0) + if (r1.getAuthor() != null && !r1.getAuthor().isEmpty()) authors.add(r1.getAuthor()); if (r1.getDateofacceptance() != null) dates.add(r1.getDateofacceptance().getValue()); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java index 5806e9fa4..d79d24653 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupUtility.java @@ -10,6 +10,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.google.common.collect.Sets; @@ -25,6 +26,9 @@ public class DedupUtility { public static final String OPENORGS_ID_PREFIX = "openorgs____"; public static final String CORDA_ID_PREFIX = "corda"; + private DedupUtility() { + } + public static Map constructAccumulator( final DedupConfig dedupConf, final SparkContext context) { @@ -92,14 +96,16 @@ public class DedupUtility { } public static List getConfigurations(String isLookUpUrl, String orchestrator) - throws ISLookUpException, DocumentException { + throws ISLookUpException, DocumentException, SAXException { final ISLookUpService isLookUpService = ISLookupClientFactory.getLookUpService(isLookUpUrl); final String xquery = String.format("/RESOURCE_PROFILE[.//DEDUPLICATION/ACTION_SET/@id = '%s']", orchestrator); String orchestratorProfile = isLookUpService.getResourceProfileByQuery(xquery); - final Document doc = new SAXReader().read(new StringReader(orchestratorProfile)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(orchestratorProfile)); final String actionSetId = doc.valueOf("//DEDUPLICATION/ACTION_SET/@id"); final List configurations = new ArrayList<>(); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java index 5506b5470..2cdc22153 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.dedup; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -33,9 +34,11 @@ public class DispatchEntitiesSparkJob { String jsonConfiguration = IOUtils .toString( - DispatchEntitiesSparkJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json")); + Objects + .requireNonNull( + DispatchEntitiesSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/dispatch_entities_parameters.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java index 58009bfcf..a19f86380 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/GroupEntitiesSparkJob.java @@ -42,7 +42,7 @@ public class GroupEntitiesSparkJob { private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class); - private final static String ID_JPATH = "$.id"; + private static final String ID_JPATH = "$.id"; private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @@ -92,7 +92,7 @@ public class GroupEntitiesSparkJob { spark .read() .textFile(toSeq(listEntityPaths(inputPath, sc))) - .map((MapFunction) s -> parseOaf(s), Encoders.kryo(OafEntity.class)) + .map((MapFunction) GroupEntitiesSparkJob::parseOaf, Encoders.kryo(OafEntity.class)) .filter((FilterFunction) e -> StringUtils.isNotBlank(ModelSupport.idFn().apply(e))) .groupByKey((MapFunction) oaf -> ModelSupport.idFn().apply(oaf), Encoders.STRING()) .agg(aggregator) @@ -188,7 +188,7 @@ public class GroupEntitiesSparkJob { try { return OBJECT_MAPPER.readValue(s, clazz); } catch (IOException e) { - throw new RuntimeException(e); + throw new IllegalArgumentException(e); } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java index dd9b16790..81cd30f88 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java @@ -15,13 +15,13 @@ public class IdGenerator implements Serializable { // pick the best pid from the list (consider date and pidtype) public static String generate(List> pids, String defaultID) { - if (pids == null || pids.size() == 0) + if (pids == null || pids.isEmpty()) return defaultID; Identifier bp = pids .stream() .min(Identifier::compareTo) - .get(); + .orElseThrow(() -> new IllegalStateException("unable to generate id")); String prefix = substringBefore(bp.getOriginalID(), "|"); String ns = substringBefore(substringAfter(bp.getOriginalID(), "|"), "::"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java index 1e13485e5..c9c9dd8fe 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkBlockStats.java @@ -9,7 +9,6 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -17,6 +16,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.Block; @@ -63,7 +63,7 @@ public class SparkBlockStats extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws DocumentException, IOException, ISLookUpException { + throws DocumentException, IOException, ISLookUpException, SAXException { // read oozie parameters final String graphBasePath = parser.get("graphBasePath"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java index 0aaa1e662..93027e99a 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyOpenorgsSimRels.java @@ -6,7 +6,6 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -17,8 +16,6 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -78,7 +75,7 @@ public class SparkCopyOpenorgsSimRels extends AbstractSparkAction { saveParquet(rawRels, outputPath, SaveMode.Append); - log.info("Copied " + rawRels.count() + " Similarity Relations"); + log.info("Copied {} Similarity Relations", rawRels.count()); } private boolean filterOpenorgsRels(Relation rel) { diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java index 9ece43891..bf0b7f687 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCopyRelationsNoOpenorgs.java @@ -2,34 +2,21 @@ package eu.dnetlib.dhp.oa.dedup; import java.io.IOException; -import java.util.Optional; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.PairFunction; -import org.apache.spark.sql.*; -import org.apache.spark.sql.Dataset; -import org.dom4j.DocumentException; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import eu.dnetlib.pace.util.MapDocumentUtil; -import scala.Tuple2; public class SparkCopyRelationsNoOpenorgs extends AbstractSparkAction { diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java index b41507e95..6989ec54b 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateDedupRecord.java @@ -13,6 +13,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.EntityType; @@ -54,7 +55,7 @@ public class SparkCreateDedupRecord extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws ISLookUpException, DocumentException, IOException { + throws ISLookUpException, DocumentException, IOException, SAXException { final String graphBasePath = parser.get("graphBasePath"); final String isLookUpUrl = parser.get("isLookUpUrl"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java index bfc605039..95e3dff28 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java @@ -24,6 +24,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import com.google.common.collect.Lists; import com.google.common.hash.Hashing; @@ -76,7 +77,7 @@ public class SparkCreateMergeRels extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws ISLookUpException, DocumentException, IOException { + throws ISLookUpException, DocumentException, IOException, SAXException { final String graphBasePath = parser.get("graphBasePath"); final String workingPath = parser.get("workingPath"); @@ -161,11 +162,11 @@ public class SparkCreateMergeRels extends AbstractSparkAction { private ConnectedComponent generateID(String key, Iterator> values) { - List> identifiers = Lists.newArrayList(values).stream().map(v -> { - T entity = v._2(); - Identifier identifier = Identifier.newInstance(entity); - return identifier; - }).collect(Collectors.toList()); + List> identifiers = Lists + .newArrayList(values) + .stream() + .map(v -> Identifier.newInstance(v._2())) + .collect(Collectors.toList()); String rootID = IdGenerator.generate(identifiers, key); @@ -235,7 +236,6 @@ public class SparkCreateMergeRels extends AbstractSparkAction { info.setProvenanceaction(provenanceAction); // TODO calculate the trust value based on the similarity score of the elements in the CC - // info.setTrust(); r.setDataInfo(info); return r; diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java index df3db7add..8e5e9fd69 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateOrgsDedupRecord.java @@ -18,9 +18,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java index 884967364..f89f634b5 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateSimRels.java @@ -7,7 +7,6 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Dataset; @@ -17,6 +16,7 @@ import org.apache.spark.sql.SparkSession; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.Block; @@ -26,8 +26,6 @@ import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.pace.config.DedupConfig; -import eu.dnetlib.pace.model.FieldListImpl; -import eu.dnetlib.pace.model.FieldValueImpl; import eu.dnetlib.pace.model.MapDocument; import eu.dnetlib.pace.util.MapDocumentUtil; import scala.Tuple2; @@ -56,7 +54,7 @@ public class SparkCreateSimRels extends AbstractSparkAction { @Override public void run(ISLookUpService isLookUpService) - throws DocumentException, IOException, ISLookUpException { + throws DocumentException, IOException, ISLookUpException, SAXException { // read oozie parameters final String graphBasePath = parser.get("graphBasePath"); @@ -110,9 +108,6 @@ public class SparkCreateSimRels extends AbstractSparkAction { Encoders.bean(Relation.class)); saveParquet(simRels, outputPath, SaveMode.Overwrite); - - log.info("Generated " + simRels.count() + " Similarity Relations"); - } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java index 657d5a832..d12048b02 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareNewOrgs.java @@ -6,10 +6,6 @@ import java.util.Optional; import java.util.Properties; import org.apache.commons.io.IOUtils; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.function.FilterFunction; @@ -23,9 +19,9 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel; +import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Organization; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; @@ -84,8 +80,9 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction { log.info("table: '{}'", dbTable); log.info("dbPwd: '{}'", "xxx"); - final String entityPath = DedupUtility.createEntityPath(graphBasePath, "organization"); - final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, "organization"); + final String organizazion = ModelSupport.getMainType(EntityType.organization); + final String entityPath = DedupUtility.createEntityPath(graphBasePath, organizazion); + final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, organizazion); final String relationPath = DedupUtility.createEntityPath(graphBasePath, "relation"); Dataset newOrgs = createNewOrgs(spark, mergeRelPath, relationPath, entityPath); @@ -115,7 +112,7 @@ public class SparkPrepareNewOrgs extends AbstractSparkAction { .textFile(relationPath) .map(patchRelFn(), Encoders.bean(Relation.class)) .toJavaRDD() - .filter(r -> filterRels(r, "organization")) + .filter(r -> filterRels(r, ModelSupport.getMainType(EntityType.organization))) // take the worst id of the diffrel: .mapToPair(rel -> { if (DedupUtility.compareOpenOrgIds(rel.getSource(), rel.getTarget()) > 0) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java index 08b39793e..61325ab50 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPrepareOrgRels.java @@ -21,6 +21,7 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.dedup.model.OrgSimRel; +import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -32,6 +33,7 @@ import scala.Tuple3; public class SparkPrepareOrgRels extends AbstractSparkAction { private static final Logger log = LoggerFactory.getLogger(SparkPrepareOrgRels.class); + public static final String GROUP_PREFIX = "group::"; public SparkPrepareOrgRels(ArgumentApplicationParser parser, SparkSession spark) { super(parser, spark); @@ -41,7 +43,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPrepareOrgRels.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/prepareOrgRels_parameters.json"))); parser.parseArgument(args); @@ -81,8 +83,9 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { log.info("table: '{}'", dbTable); log.info("dbPwd: '{}'", "xxx"); - final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, "organization"); - final String entityPath = DedupUtility.createEntityPath(graphBasePath, "organization"); + final String organization = ModelSupport.getMainType(EntityType.organization); + final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, organization); + final String entityPath = DedupUtility.createEntityPath(graphBasePath, organization); final String relationPath = DedupUtility.createEntityPath(graphBasePath, "relation"); Dataset relations = createRelations(spark, mergeRelPath, relationPath, entityPath); @@ -168,7 +171,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { .map(g -> Lists.newArrayList(g._2())) .filter(l -> l.size() > 1) .flatMap(l -> { - String groupId = "group::" + UUID.randomUUID(); + String groupId = GROUP_PREFIX + UUID.randomUUID(); List ids = sortIds(l); // sort IDs by type List, String>> rels = new ArrayList<>(); String source = ids.get(0); @@ -192,7 +195,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { .collect(Collectors.toList()))) // : take only relations with only the group_id, it // means they are correct. If the diffRel is present the relation has to be removed - .filter(g -> g._2().size() == 1 && g._2().get(0).contains("group::")) + .filter(g -> g._2().size() == 1 && g._2().get(0).contains(GROUP_PREFIX)) .map( t -> new Tuple3<>( t._1().split("@@@")[0], @@ -255,7 +258,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { // Sort IDs basing on the type. Priority: 1) openorgs, 2)corda, 3)alphabetic public static List sortIds(List ids) { - ids.sort((o1, o2) -> DedupUtility.compareOpenOrgIds(o1, o2)); + ids.sort(DedupUtility::compareOpenOrgIds); return ids; } @@ -289,9 +292,10 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { List> rels = new ArrayList<>(); for (String id1 : g._2()) { for (String id2 : g._2()) { - if (!id1.equals(id2)) - if (id1.contains(DedupUtility.OPENORGS_ID_PREFIX) && !id2.contains("openorgsmesh")) - rels.add(new Tuple2<>(id1, id2)); + if (!id1.equals(id2) && id1.contains(DedupUtility.OPENORGS_ID_PREFIX) + && !id2.contains("openorgsmesh")) { + rels.add(new Tuple2<>(id1, id2)); + } } } return rels.iterator(); @@ -310,7 +314,7 @@ public class SparkPrepareOrgRels extends AbstractSparkAction { r._2()._2().getCountry() != null ? r._2()._2().getCountry().getClassid() : "", r._2()._2().getWebsiteurl() != null ? r._2()._2().getWebsiteurl().getValue() : "", r._2()._2().getCollectedfrom().get(0).getValue(), - "group::" + r._1()._1(), + GROUP_PREFIX + r._1()._1(), structuredPropertyListToString(r._2()._2().getPid()), parseECField(r._2()._2().getEclegalbody()), parseECField(r._2()._2().getEclegalperson()), diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java index 220b0f483..0fa41bd6d 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java @@ -40,7 +40,7 @@ public class SparkPropagateRelation extends AbstractSparkAction { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPropagateRelation.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json"))); @@ -113,7 +113,7 @@ public class SparkPropagateRelation extends AbstractSparkAction { .join(r.getSource(), r.getTarget(), r.getRelType(), r.getSubRelType(), r.getRelClass()), Encoders.STRING()) .agg(new RelationAggregator().toColumn()) - .map((MapFunction, Relation>) t -> t._2(), Encoders.bean(Relation.class)); + .map((MapFunction, Relation>) Tuple2::_2, Encoders.bean(Relation.class)); } // redirect the relations to the dedupID @@ -163,7 +163,7 @@ public class SparkPropagateRelation extends AbstractSparkAction { private FilterFunction getRelationFilterFunction() { return r -> StringUtils.isNotBlank(r.getSource()) || StringUtils.isNotBlank(r.getTarget()) || - StringUtils.isNotBlank(r.getRelClass()) || + StringUtils.isNotBlank(r.getRelType()) || StringUtils.isNotBlank(r.getSubRelType()) || StringUtils.isNotBlank(r.getRelClass()); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java index fdef7f77d..49021ab58 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkUpdateEntity.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.oa.dedup; import java.io.IOException; +import java.util.Map; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; @@ -13,7 +14,6 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -26,8 +26,10 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Oaf; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.pace.util.MapDocumentUtil; @@ -72,83 +74,76 @@ public class SparkUpdateEntity extends AbstractSparkAction { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - // for each entity - ModelSupport.entityTypes - .forEach( - (type, clazz) -> { - final String outputPath = dedupGraphPath + "/" + type; - removeOutputDir(spark, outputPath); - final String ip = DedupUtility.createEntityPath(graphBasePath, type.toString()); - if (HdfsSupport.exists(ip, sc.hadoopConfiguration())) { - JavaRDD sourceEntity = sc - .textFile(DedupUtility.createEntityPath(graphBasePath, type.toString())); + for (Map.Entry e : ModelSupport.entityTypes.entrySet()) { + final EntityType type = e.getKey(); + final Class clazz = e.getValue(); + final String outputPath = dedupGraphPath + "/" + type; + removeOutputDir(spark, outputPath); + final String ip = DedupUtility.createEntityPath(graphBasePath, type.toString()); + if (HdfsSupport.exists(ip, sc.hadoopConfiguration())) { + JavaRDD sourceEntity = sc + .textFile(DedupUtility.createEntityPath(graphBasePath, type.toString())); - if (mergeRelExists(workingPath, type.toString())) { + if (mergeRelExists(workingPath, type.toString())) { - final String mergeRelPath = DedupUtility - .createMergeRelPath(workingPath, "*", type.toString()); - final String dedupRecordPath = DedupUtility - .createDedupRecordPath(workingPath, "*", type.toString()); + final String mergeRelPath = DedupUtility + .createMergeRelPath(workingPath, "*", type.toString()); + final String dedupRecordPath = DedupUtility + .createDedupRecordPath(workingPath, "*", type.toString()); - final Dataset rel = spark - .read() - .load(mergeRelPath) - .as(Encoders.bean(Relation.class)); + final Dataset rel = spark + .read() + .load(mergeRelPath) + .as(Encoders.bean(Relation.class)); - final JavaPairRDD mergedIds = rel - .where("relClass == 'merges'") - .where("source != target") - .select(rel.col("target")) - .distinct() - .toJavaRDD() - .mapToPair( - (PairFunction) r -> new Tuple2<>(r.getString(0), "d")); + final JavaPairRDD mergedIds = rel + .where("relClass == 'merges'") + .where("source != target") + .select(rel.col("target")) + .distinct() + .toJavaRDD() + .mapToPair( + (PairFunction) r -> new Tuple2<>(r.getString(0), "d")); - JavaPairRDD entitiesWithId = sourceEntity - .mapToPair( - (PairFunction) s -> new Tuple2<>( - MapDocumentUtil.getJPathString(IDJSONPATH, s), s)); - if (type == EntityType.organization) // exclude root records from organizations - entitiesWithId = excludeRootOrgs(entitiesWithId, rel); + JavaPairRDD entitiesWithId = sourceEntity + .mapToPair( + (PairFunction) s -> new Tuple2<>( + MapDocumentUtil.getJPathString(IDJSONPATH, s), s)); + if (type == EntityType.organization) // exclude root records from organizations + entitiesWithId = excludeRootOrgs(entitiesWithId, rel); - JavaRDD map = entitiesWithId - .leftOuterJoin(mergedIds) - .map(k -> { - if (k._2()._2().isPresent()) { - return updateDeletedByInference(k._2()._1(), clazz); - } - return k._2()._1(); - }); + JavaRDD map = entitiesWithId + .leftOuterJoin(mergedIds) + .map(k -> { + if (k._2()._2().isPresent()) { + return updateDeletedByInference(k._2()._1(), clazz); + } + return k._2()._1(); + }); - sourceEntity = map.union(sc.textFile(dedupRecordPath)); - - } - - sourceEntity.saveAsTextFile(outputPath, GzipCodec.class); - } - }); + sourceEntity = map.union(sc.textFile(dedupRecordPath)); + } + sourceEntity.saveAsTextFile(outputPath, GzipCodec.class); + } + } } - public boolean mergeRelExists(String basePath, String entity) { + public boolean mergeRelExists(String basePath, String entity) throws IOException { boolean result = false; - try { - FileSystem fileSystem = FileSystem.get(new Configuration()); - FileStatus[] fileStatuses = fileSystem.listStatus(new Path(basePath)); + FileSystem fileSystem = FileSystem.get(new Configuration()); + FileStatus[] fileStatuses = fileSystem.listStatus(new Path(basePath)); - for (FileStatus fs : fileStatuses) { - if (fs.isDirectory()) - if (fileSystem - .exists( - new Path(DedupUtility.createMergeRelPath(basePath, fs.getPath().getName(), entity)))) - result = true; + for (FileStatus fs : fileStatuses) { + final Path mergeRelPath = new Path( + DedupUtility.createMergeRelPath(basePath, fs.getPath().getName(), entity)); + if (fs.isDirectory() && fileSystem.exists(mergeRelPath)) { + result = true; } - - return result; - } catch (IOException e) { - throw new RuntimeException(e); } + + return result; } private static String updateDeletedByInference( diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java index 3a986a9dd..3e564052e 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java @@ -3,29 +3,15 @@ package eu.dnetlib.dhp.oa.dedup.graph; import java.io.IOException; import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.lang.StringUtils; -import org.apache.spark.api.java.function.MapFunction; import org.codehaus.jackson.annotate.JsonIgnore; -import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.Lists; -import eu.dnetlib.dhp.oa.dedup.DedupUtility; -import eu.dnetlib.dhp.oa.dedup.IdGenerator; -import eu.dnetlib.dhp.oa.dedup.model.Identifier; -import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.utils.DHPUtils; -import eu.dnetlib.pace.config.DedupConfig; -import eu.dnetlib.pace.util.MapDocumentUtil; import eu.dnetlib.pace.util.PaceException; public class ConnectedComponent implements Serializable { diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java index e821d7ef5..a25a853ef 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java @@ -19,7 +19,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.PidComparator; import eu.dnetlib.dhp.schema.oaf.utils.PidType; -public class Identifier implements Serializable, Comparable { +public class Identifier implements Serializable, Comparable> { public static final String DATE_FORMAT = "yyyy-MM-dd"; public static final String BASE_DATE = "2000-01-01"; @@ -29,8 +29,8 @@ public class Identifier implements Serializable, Comparable // cached date value private Date date = null; - public static Identifier newInstance(T entity) { - return new Identifier(entity); + public static Identifier newInstance(T entity) { + return new Identifier<>(entity); } public Identifier(T entity) { @@ -88,7 +88,7 @@ public class Identifier implements Serializable, Comparable } @Override - public int compareTo(Identifier i) { + public int compareTo(Identifier i) { // priority in comparisons: 1) pidtype, 2) collectedfrom (depending on the entity type) , 3) date 4) // alphabetical order of the originalID diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java index 7c58c375a..daea29a07 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DatePickerTest.java @@ -10,12 +10,12 @@ import org.junit.jupiter.api.Test; import com.clearspring.analytics.util.Lists; -public class DatePickerTest { +class DatePickerTest { Collection dates = Lists.newArrayList(); @Test - public void testPickISO() { + void testPickISO() { dates.add("2016-01-01T12:00:00Z"); dates.add("2016-06-16T12:00:00Z"); dates.add("2020-01-01T12:00:00Z"); @@ -24,7 +24,7 @@ public class DatePickerTest { } @Test - public void testPickSimple() { + void testPickSimple() { dates.add("2016-01-01"); dates.add("2016-06-16"); dates.add("2020-01-01"); @@ -33,7 +33,7 @@ public class DatePickerTest { } @Test - public void testPickFrequent() { + void testPickFrequent() { dates.add("2016-02-01"); dates.add("2016-02-01"); dates.add("2016-02-01"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java index 80154fbb7..e86f91f99 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/EntityMergerTest.java @@ -20,7 +20,7 @@ import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.pace.util.MapDocumentUtil; import scala.Tuple2; -public class EntityMergerTest implements Serializable { +class EntityMergerTest implements Serializable { private List> publications; private List> publications2; @@ -54,7 +54,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void softwareMergerTest() throws InstantiationException, IllegalAccessException { + void softwareMergerTest() throws InstantiationException, IllegalAccessException { List> softwares = readSample( testEntityBasePath + "/software_merge.json", Software.class); @@ -69,7 +69,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest() throws InstantiationException, IllegalAccessException { + void publicationMergerTest() throws InstantiationException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications.iterator(), 0, dataInfo, Publication.class); @@ -125,7 +125,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest2() throws InstantiationException, IllegalAccessException { + void publicationMergerTest2() throws InstantiationException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications2.iterator(), 0, dataInfo, Publication.class); @@ -137,7 +137,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest3() throws InstantiationException, IllegalAccessException { + void publicationMergerTest3() throws InstantiationException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications3.iterator(), 0, dataInfo, Publication.class); @@ -147,7 +147,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest4() throws InstantiationException, IllegalStateException, IllegalAccessException { + void publicationMergerTest4() throws InstantiationException, IllegalStateException, IllegalAccessException { Publication pub_merged = DedupRecordFactory .entityMerger(dedupId, publications4.iterator(), 0, dataInfo, Publication.class); @@ -157,7 +157,7 @@ public class EntityMergerTest implements Serializable { } @Test - public void publicationMergerTest5() throws InstantiationException, IllegalStateException, IllegalAccessException { + void publicationMergerTest5() throws InstantiationException, IllegalStateException, IllegalAccessException { System.out .println( diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java index 1a279fac7..2d6637882 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/IdGeneratorTest.java @@ -56,7 +56,7 @@ public class IdGeneratorTest { } @Test - public void generateIdTest1() { + void generateIdTest1() { String id1 = IdGenerator.generate(bestIds, "50|defaultID"); System.out @@ -66,7 +66,7 @@ public class IdGeneratorTest { } @Test - public void generateIdTest2() { + void generateIdTest2() { String id1 = IdGenerator.generate(bestIds2, "50|defaultID"); String id2 = IdGenerator.generate(bestIds3, "50|defaultID"); @@ -82,7 +82,7 @@ public class IdGeneratorTest { } @Test - public void generateIdOrganizationTest() { + void generateIdOrganizationTest() { String id1 = IdGenerator.generate(bestIdsOrg, "20|defaultID"); assertEquals("20|openorgs____::599c15a70fcb03be6ba08f75f14d6076", id1); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index bf4913056..1860a3ff0 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -12,7 +12,6 @@ import java.io.IOException; import java.io.Serializable; import java.net.URISyntaxException; import java.nio.file.Paths; -import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; @@ -148,7 +147,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(1) - public void createSimRelsTest() throws Exception { + void createSimRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -203,7 +202,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(2) - public void cutMergeRelsTest() throws Exception { + void cutMergeRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -299,7 +298,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(3) - public void createMergeRelsTest() throws Exception { + void createMergeRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -355,7 +354,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(4) - public void createDedupRecordTest() throws Exception { + void createDedupRecordTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -402,7 +401,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(5) - public void updateEntityTest() throws Exception { + void updateEntityTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -518,7 +517,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(6) - public void propagateRelationTest() throws Exception { + void propagateRelationTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -568,7 +567,7 @@ public class SparkDedupTest implements Serializable { @Test @Order(7) - public void testRelations() throws Exception { + void testRelations() throws Exception { testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_1.json", 12, 10); testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_2.json", 10, 2); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java index 97cfab118..9312d83b1 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java @@ -67,7 +67,7 @@ public class SparkOpenorgsDedupTest implements Serializable { public static void cleanUp() throws IOException, URISyntaxException { testGraphBasePath = Paths - .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/openorgs/dedup").toURI()) + .get(SparkOpenorgsDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/openorgs/dedup").toURI()) .toFile() .getAbsolutePath(); testOutputBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") @@ -101,7 +101,7 @@ public class SparkOpenorgsDedupTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkOpenorgsDedupTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator_openorgs.xml"))); @@ -110,14 +110,14 @@ public class SparkOpenorgsDedupTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkOpenorgsDedupTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); } @Test @Order(1) - public void createSimRelsTest() throws Exception { + void createSimRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -148,7 +148,7 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(2) - public void copyOpenorgsSimRels() throws Exception { + void copyOpenorgsSimRels() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( @@ -177,7 +177,7 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(3) - public void createMergeRelsTest() throws Exception { + void createMergeRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -230,11 +230,11 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(4) - public void prepareOrgRelsTest() throws Exception { + void prepareOrgRelsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPrepareOrgRels.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/prepareOrgRels_parameters.json"))); parser @@ -313,11 +313,11 @@ public class SparkOpenorgsDedupTest implements Serializable { @Test @Order(5) - public void prepareNewOrgsTest() throws Exception { + void prepareNewOrgsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkPrepareNewOrgs.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/prepareNewOrgs_parameters.json"))); parser diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java index 606dd9e5b..2349ffebe 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsProvisionTest.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.dedup; import static java.nio.file.Files.createTempDirectory; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.mockito.Mockito.lenient; import java.io.File; @@ -12,8 +11,6 @@ import java.io.IOException; import java.io.Serializable; import java.net.URISyntaxException; import java.nio.file.Paths; -import java.util.Collections; -import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; @@ -32,9 +29,6 @@ import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -110,7 +104,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(1) - public void copyOpenorgsMergeRelTest() throws Exception { + void copyOpenorgsMergeRelTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -143,7 +137,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(2) - public void createOrgsDedupRecordTest() throws Exception { + void createOrgsDedupRecordTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -176,7 +170,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(3) - public void updateEntityTest() throws Exception { + void updateEntityTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -216,7 +210,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(4) - public void copyRelationsNoOpenorgsTest() throws Exception { + void copyRelationsNoOpenorgsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -239,7 +233,7 @@ public class SparkOpenorgsProvisionTest implements Serializable { @Test @Order(5) - public void propagateRelationsTest() throws Exception { + void propagateRelationsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java index 31de8d951..2efbe260a 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java @@ -124,7 +124,7 @@ public class SparkStatsTest implements Serializable { } @Test - public void createBlockStatsTest() throws Exception { + void createBlockStatsTest() throws Exception { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java index 1759180d2..7348a3bd2 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java @@ -1,13 +1,15 @@ package eu.dnetlib.dhp.oa.dedup.jpath; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.junit.platform.commons.util.StringUtils; import eu.dnetlib.pace.config.DedupConfig; import eu.dnetlib.pace.model.MapDocument; import eu.dnetlib.pace.util.MapDocumentUtil; -public class JsonPathTest { +class JsonPathTest { String json = "{\t\"dataInfo\":{\t\t\"invisible\":false,\t\t\"inferred\":false,\t\t\"deletedbyinference\":false,\t\t\"trust\":\"0.810000002384185791\",\t\t\"inferenceprovenance\":\"\",\t\t\"provenanceaction\":{\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t}\t},\t\"lastupdatetimestamp\":1584960968152,\t\"id\":\"20|corda__h2020::9faf23721249f26ac2c16eb857ea1fb9\",\t\"originalId\":[\t\t\"corda__h2020::927957582\"\t],\t\"collectedfrom\":[\t\t{\t\t\t\"key\":\"openaire____::corda_h2020\",\t\t\t\"value\":\"CORDA - COmmon Research DAta Warehouse - Horizon 2020\",\t\t\t\"dataInfo\":null\t\t}\t],\t\"pid\":[\t],\t\"dateofcollection\":\"2016-06-05\",\t\"dateoftransformation\":\"2019-11-19\",\t\"extraInfo\":[\t],\t\"oaiprovenance\":null,\t\"legalshortname\":{\t\t\"value\":\"Comentor AB\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"legalname\":{\t\t\"value\":\"Comentor AB\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"alternativeNames\":[\t],\t\"websiteurl\":{\t\t\"value\":\"http://www.comentor.se\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"logourl\":null,\t\"eclegalbody\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"eclegalperson\":{\t\t\"value\":\"true\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecnonprofit\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecresearchorganization\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"echighereducation\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecinternationalorganizationeurinterests\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecinternationalorganization\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecenterprise\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecsmevalidated\":{\t\t\"value\":\"true\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"ecnutscode\":{\t\t\"value\":\"false\",\t\t\"dataInfo\":{\t\t\t\"invisible\":false,\t\t\t\"inferred\":false,\t\t\t\"deletedbyinference\":false,\t\t\t\"trust\":\"0.810000002384185791\",\t\t\t\"inferenceprovenance\":\"\",\t\t\t\"provenanceaction\":{\t\t\t\t\"classid\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"classname\":\"sysimport:crosswalk:entityregistry\",\t\t\t\t\"schemeid\":\"dnet:provenance_actions\",\t\t\t\t\"schemename\":\"dnet:provenance_actions\"\t\t\t}\t\t}\t},\t\"country\":null}"; DedupConfig conf = DedupConfig @@ -283,15 +285,18 @@ public class JsonPathTest { + "}"); @Test - public void testJPath() throws Exception { + void testJPath() { MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(conf, json); + Assertions.assertNotNull(d); + Assertions.assertTrue(StringUtils.isNotBlank(d.getIdentifier())); + System.out.println("d = " + d); } @Test - public void testNull() throws Exception { + void testNull() { final Object p = null; System.out.println((String) p); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java index ee6136b58..c6c207727 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/CrossrefImporter.java @@ -2,6 +2,7 @@ package eu.dnetlib.doiboost.crossref; import java.io.ByteArrayOutputStream; +import java.util.Objects; import java.util.Optional; import java.util.zip.Inflater; @@ -22,9 +23,11 @@ public class CrossrefImporter { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - CrossrefImporter.class - .getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/import_from_es.json"))); + Objects + .requireNonNull( + CrossrefImporter.class + .getResourceAsStream( + "/eu/dnetlib/dhp/doiboost/import_from_es.json")))); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java index dcebbbcac..6d6a4ca78 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ESClient.java @@ -1,6 +1,7 @@ package eu.dnetlib.doiboost.crossref; +import java.io.IOException; import java.util.Iterator; import java.util.List; @@ -11,8 +12,6 @@ import org.apache.http.client.methods.HttpPost; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.jayway.jsonpath.JsonPath; @@ -57,8 +56,8 @@ public class ESClient implements Iterator { try (CloseableHttpResponse response = client.execute(httpPost)) { return IOUtils.toString(response.getEntity().getContent()); } - } catch (Throwable e) { - throw new RuntimeException("Error on executing request ", e); + } catch (IOException e) { + throw new IllegalStateException("Error on executing request ", e); } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java index feb540fcd..f725b3222 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ActivitiesDecompressor.java @@ -29,8 +29,10 @@ public class ActivitiesDecompressor { private static final int MAX_XML_WORKS_PARSED = -1; private static final int XML_WORKS_PARSED_COUNTER_LOG_INTERVAL = 100000; - public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath) - throws Exception { + private ActivitiesDecompressor() { + } + + public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath) throws IOException { String uri = inputUri; FileSystem fs = FileSystem.get(URI.create(uri), conf); Path inputPath = new Path(uri); @@ -44,7 +46,7 @@ public class ActivitiesDecompressor { InputStream gzipInputStream = null; try { gzipInputStream = codec.createInputStream(fs.open(inputPath)); - parseTarActivities(fs, conf, gzipInputStream, outputPath); + parseTarActivities(conf, gzipInputStream, outputPath); } finally { Log.debug("Closing gzip stream"); @@ -52,8 +54,7 @@ public class ActivitiesDecompressor { } } - private static void parseTarActivities( - FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) { + private static void parseTarActivities(Configuration conf, InputStream gzipInputStream, Path outputPath) { int counter = 0; int doiFound = 0; int errorFromOrcidFound = 0; @@ -79,11 +80,11 @@ public class ActivitiesDecompressor { BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from // tarInput String line; - StringBuffer buffer = new StringBuffer(); + StringBuilder builder = new StringBuilder(); while ((line = br.readLine()) != null) { - buffer.append(line); + builder.append(line); } - WorkData workData = XMLRecordParser.VTDParseWorkData(buffer.toString().getBytes()); + WorkData workData = XMLRecordParser.VTDParseWorkData(builder.toString().getBytes()); if (workData != null) { if (workData.getErrorCode() != null) { errorFromOrcidFound += 1; @@ -113,7 +114,7 @@ public class ActivitiesDecompressor { } } else { - Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer); + Log.warn("Data not retrievable [" + entry.getName() + "] " + builder); xmlParserErrorFound += 1; } } @@ -177,11 +178,11 @@ public class ActivitiesDecompressor { counter++; BufferedReader br = new BufferedReader(new InputStreamReader(tais)); String line; - StringBuffer buffer = new StringBuffer(); + StringBuilder builder = new StringBuilder(); while ((line = br.readLine()) != null) { - buffer.append(line); + builder.append(line); } - String xml = buffer.toString(); + String xml = builder.toString(); String[] filenameParts = filename.split("/"); final Text key = new Text( XMLRecordParser diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java index 4de4a0266..99587b16a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLActivitiesData.java @@ -3,6 +3,7 @@ package eu.dnetlib.doiboost.orcid; import java.io.IOException; +import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -10,7 +11,6 @@ import org.apache.hadoop.fs.Path; import org.mortbay.log.Log; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork; public class ExtractXMLActivitiesData extends OrcidDSManager { private String outputWorksPath; @@ -22,11 +22,11 @@ public class ExtractXMLActivitiesData extends OrcidDSManager { extractXMLActivitiesData.extractWorks(); } - private void loadArgs(String[] args) throws Exception { + private void loadArgs(String[] args) throws ParseException, IOException { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - GenOrcidAuthorWork.class + ExtractXMLActivitiesData.class .getResourceAsStream( "/eu/dnetlib/dhp/doiboost/gen_orcid_works-no-doi_from_activities.json"))); parser.parseArgument(args); @@ -43,7 +43,6 @@ public class ExtractXMLActivitiesData extends OrcidDSManager { private void extractWorks() throws Exception { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz); Path outputPath = new Path( hdfsServerUri diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java index 5c2a35229..4121f3391 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/ExtractXMLSummariesData.java @@ -5,12 +5,13 @@ import java.io.IOException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.mortbay.log.Log; +import com.ximpleware.ParseException; + import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.doiboost.orcidnodoi.GenOrcidAuthorWork; +import eu.dnetlib.dhp.parser.utility.VtdException; public class ExtractXMLSummariesData extends OrcidDSManager { @@ -27,7 +28,7 @@ public class ExtractXMLSummariesData extends OrcidDSManager { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - GenOrcidAuthorWork.class + ExtractXMLSummariesData.class .getResourceAsStream( "/eu/dnetlib/dhp/doiboost/gen_orcid_authors_from_summaries.json"))); parser.parseArgument(args); @@ -42,9 +43,8 @@ public class ExtractXMLSummariesData extends OrcidDSManager { Log.info("Output Authors Data: " + outputAuthorsPath); } - public void extractAuthors() throws Exception { + public void extractAuthors() throws IOException, VtdException, ParseException { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(summariesFileNameTarGz); Path outputPath = new Path( hdfsServerUri diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java index 3b4033450..7e4869fdd 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidAuthorsDOIsDataGen.java @@ -22,9 +22,8 @@ public class OrcidAuthorsDOIsDataGen extends OrcidDSManager { orcidAuthorsDOIsDataGen.generateAuthorsDOIsData(); } - public void generateAuthorsDOIsData() throws Exception { + public void generateAuthorsDOIsData() throws IOException { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz); Path outputPath = new Path(hdfsServerUri.concat(workingPath).concat(outputAuthorsDOIsPath)); ActivitiesDecompressor.parseGzActivities(conf, tarGzUri, outputPath); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java index 73a4bfd05..0b4ef279d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/OrcidDSManager.java @@ -25,9 +25,8 @@ public class OrcidDSManager { orcidDSManager.generateAuthors(); } - public void generateAuthors() throws Exception { + public void generateAuthors() throws IOException { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(summariesFileNameTarGz); Path outputPath = new Path( hdfsServerUri diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java index 8cf070213..2b8e42bf6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java @@ -3,6 +3,7 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.io.FileNotFoundException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Optional; @@ -64,7 +65,7 @@ public class SparkDownloadOrcidAuthors { String lastUpdate = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt"); logger.info("lastUpdate: {}", lastUpdate); if (StringUtils.isBlank(lastUpdate)) { - throw new RuntimeException("last update info not found"); + throw new FileNotFoundException("last update info not found"); } JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -82,7 +83,7 @@ public class SparkDownloadOrcidAuthors { JavaPairRDD lamdaFileRDD = sc .sequenceFile(workingPath + lambdaFileName, Text.class, Text.class); final long lamdaFileRDDCount = lamdaFileRDD.count(); - logger.info("Data retrieved: " + lamdaFileRDDCount); + logger.info("Data retrieved: {}", lamdaFileRDDCount); Function, Boolean> isModifiedAfterFilter = data -> { String orcidId = data._1().toString(); @@ -95,7 +96,7 @@ public class SparkDownloadOrcidAuthors { return false; }; - Function, Tuple2> downloadRecordFunction = data -> { + Function, Tuple2> downloadRecordFn = data -> { String orcidId = data._1().toString(); String lastModifiedDate = data._2().toString(); final DownloadedRecordData downloaded = new DownloadedRecordData(); @@ -118,14 +119,19 @@ public class SparkDownloadOrcidAuthors { switch (statusCode) { case 403: errorHTTP403Acc.add(1); + break; case 404: errorHTTP404Acc.add(1); + break; case 409: errorHTTP409Acc.add(1); + break; case 503: errorHTTP503Acc.add(1); + break; case 525: errorHTTP525Acc.add(1); + break; default: errorHTTPGenericAcc.add(1); } @@ -145,33 +151,41 @@ public class SparkDownloadOrcidAuthors { logger.info("Start execution ..."); JavaPairRDD authorsModifiedRDD = lamdaFileRDD.filter(isModifiedAfterFilter); long authorsModifiedCount = authorsModifiedRDD.count(); - logger.info("Authors modified count: " + authorsModifiedCount); + logger.info("Authors modified count: {}", authorsModifiedCount); logger.info("Start downloading ..."); - authorsModifiedRDD - .repartition(100) - .map(downloadRecordFunction) - .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) - .saveAsNewAPIHadoopFile( - workingPath.concat(outputPath), - Text.class, - Text.class, - SequenceFileOutputFormat.class, - sc.hadoopConfiguration()); - logger.info("parsedRecordsAcc: " + parsedRecordsAcc.value().toString()); - logger.info("modifiedRecordsAcc: " + modifiedRecordsAcc.value().toString()); - logger.info("downloadedRecordsAcc: " + downloadedRecordsAcc.value().toString()); - logger.info("errorHTTP403Acc: " + errorHTTP403Acc.value().toString()); - logger.info("errorHTTP404Acc: " + errorHTTP404Acc.value().toString()); - logger.info("errorHTTP409Acc: " + errorHTTP409Acc.value().toString()); - logger.info("errorHTTP503Acc: " + errorHTTP503Acc.value().toString()); - logger.info("errorHTTP525Acc: " + errorHTTP525Acc.value().toString()); - logger.info("errorHTTPGenericAcc: " + errorHTTPGenericAcc.value().toString()); + final JavaPairRDD pairRDD = authorsModifiedRDD + .repartition(100) + .map(downloadRecordFn) + .mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))); + + saveAsSequenceFile(workingPath, outputPath, sc, pairRDD); + + logger.info("parsedRecordsAcc: {}", parsedRecordsAcc.value()); + logger.info("modifiedRecordsAcc: {}", modifiedRecordsAcc.value()); + logger.info("downloadedRecordsAcc: {}", downloadedRecordsAcc.value()); + logger.info("errorHTTP403Acc: {}", errorHTTP403Acc.value()); + logger.info("errorHTTP404Acc: {}", errorHTTP404Acc.value()); + logger.info("errorHTTP409Acc: {}", errorHTTP409Acc.value()); + logger.info("errorHTTP503Acc: {}", errorHTTP503Acc.value()); + logger.info("errorHTTP525Acc: {}", errorHTTP525Acc.value()); + logger.info("errorHTTPGenericAcc: {}", errorHTTPGenericAcc.value()); }); } + private static void saveAsSequenceFile(String workingPath, String outputPath, JavaSparkContext sc, + JavaPairRDD pairRDD) { + pairRDD + .saveAsNewAPIHadoopFile( + workingPath.concat(outputPath), + Text.class, + Text.class, + SequenceFileOutputFormat.class, + sc.hadoopConfiguration()); + } + public static boolean isModified(String orcidId, String modifiedDate, String lastUpdate) { Date modifiedDateDt; Date lastUpdateDt; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java index 59de7ca80..cab538783 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java @@ -3,8 +3,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; -import java.text.SimpleDateFormat; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.*; @@ -13,7 +11,6 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; -import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; @@ -61,7 +58,7 @@ public class SparkDownloadOrcidWorks { .orElse(Boolean.TRUE); logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); final String workingPath = parser.get("workingPath"); - logger.info("workingPath: ", workingPath); + logger.info("workingPath: {}", workingPath); final String outputPath = parser.get("outputPath"); final String token = parser.get("token"); final String hdfsServerUri = parser.get("hdfsServerUri"); @@ -169,20 +166,25 @@ public class SparkDownloadOrcidWorks { switch (statusCode) { case 403: errorHTTP403Acc.add(1); + break; case 404: errorHTTP404Acc.add(1); + break; case 409: errorHTTP409Acc.add(1); + break; case 503: errorHTTP503Acc.add(1); + break; case 525: errorHTTP525Acc.add(1); + break; default: errorHTTPGenericAcc.add(1); logger .info( - "Downloading " + orcidId + " status code: " - + response.getStatusLine().getStatusCode()); + "Downloading {} status code: {}", orcidId, + response.getStatusLine().getStatusCode()); } return downloaded.toTuple2(); } @@ -199,24 +201,24 @@ public class SparkDownloadOrcidWorks { .flatMap(retrieveWorkUrlFunction) .repartition(100) .map(downloadWorkFunction) - .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) + .mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))) .saveAsTextFile(workingPath.concat(outputPath), GzipCodec.class); - logger.info("updatedAuthorsAcc: " + updatedAuthorsAcc.value().toString()); - logger.info("parsedAuthorsAcc: " + parsedAuthorsAcc.value().toString()); - logger.info("parsedWorksAcc: " + parsedWorksAcc.value().toString()); - logger.info("modifiedWorksAcc: " + modifiedWorksAcc.value().toString()); - logger.info("maxModifiedWorksLimitAcc: " + maxModifiedWorksLimitAcc.value().toString()); - logger.info("errorCodeFoundAcc: " + errorCodeFoundAcc.value().toString()); - logger.info("errorLoadingJsonFoundAcc: " + errorLoadingJsonFoundAcc.value().toString()); - logger.info("errorLoadingXMLFoundAcc: " + errorLoadingXMLFoundAcc.value().toString()); - logger.info("errorParsingXMLFoundAcc: " + errorParsingXMLFoundAcc.value().toString()); - logger.info("downloadedRecordsAcc: " + downloadedRecordsAcc.value().toString()); - logger.info("errorHTTP403Acc: " + errorHTTP403Acc.value().toString()); - logger.info("errorHTTP409Acc: " + errorHTTP409Acc.value().toString()); - logger.info("errorHTTP503Acc: " + errorHTTP503Acc.value().toString()); - logger.info("errorHTTP525Acc: " + errorHTTP525Acc.value().toString()); - logger.info("errorHTTPGenericAcc: " + errorHTTPGenericAcc.value().toString()); + logger.info("updatedAuthorsAcc: {}", updatedAuthorsAcc.value()); + logger.info("parsedAuthorsAcc: {}", parsedAuthorsAcc.value()); + logger.info("parsedWorksAcc: {}", parsedWorksAcc.value()); + logger.info("modifiedWorksAcc: {}", modifiedWorksAcc.value()); + logger.info("maxModifiedWorksLimitAcc: {}", maxModifiedWorksLimitAcc.value()); + logger.info("errorCodeFoundAcc: {}", errorCodeFoundAcc.value()); + logger.info("errorLoadingJsonFoundAcc: {}", errorLoadingJsonFoundAcc.value()); + logger.info("errorLoadingXMLFoundAcc: {}", errorLoadingXMLFoundAcc.value()); + logger.info("errorParsingXMLFoundAcc: {}", errorParsingXMLFoundAcc.value()); + logger.info("downloadedRecordsAcc: {}", downloadedRecordsAcc.value()); + logger.info("errorHTTP403Acc: {}", errorHTTP403Acc.value()); + logger.info("errorHTTP409Acc: {}", errorHTTP409Acc.value()); + logger.info("errorHTTP503Acc: {}", errorHTTP503Acc.value()); + logger.info("errorHTTP525Acc: {}", errorHTTP525Acc.value()); + logger.info("errorHTTPGenericAcc: {}", errorHTTPGenericAcc.value()); }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java index 178d07608..d2fed61ec 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenLastModifiedSeq.java @@ -3,7 +3,8 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.*; +import java.io.BufferedReader; +import java.io.InputStreamReader; import java.net.URI; import java.util.Arrays; import java.util.List; @@ -15,7 +16,6 @@ import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; @@ -28,10 +28,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.doiboost.orcid.util.HDFSUtil; public class SparkGenLastModifiedSeq { - private static String hdfsServerUri; - private static String workingPath; - private static String outputPath; - private static String lambdaFileName; public static void main(String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -45,12 +41,14 @@ public class SparkGenLastModifiedSeq { .ofNullable(parser.get("isSparkSessionManaged")) .map(Boolean::valueOf) .orElse(Boolean.TRUE); - hdfsServerUri = parser.get("hdfsServerUri"); - workingPath = parser.get("workingPath"); - outputPath = parser.get("outputPath"); - lambdaFileName = parser.get("lambdaFileName"); - String lambdaFileUri = hdfsServerUri.concat(workingPath).concat(lambdaFileName); - String lastModifiedDateFromLambdaFileUri = "last_modified_date_from_lambda_file.txt"; + + final String hdfsServerUri = parser.get("hdfsServerUri"); + final String workingPath = parser.get("workingPath"); + final String outputPath = parser.get("outputPath"); + final String lambdaFileName = parser.get("lambdaFileName"); + + final String lambdaFileUri = hdfsServerUri.concat(workingPath).concat(lambdaFileName); + final String lastModifiedDateFromLambdaFileUri = "last_modified_date_from_lambda_file.txt"; SparkConf sparkConf = new SparkConf(); runWithSparkSession( @@ -64,7 +62,7 @@ public class SparkGenLastModifiedSeq { .concat(workingPath) .concat(outputPath)); Path hdfsreadpath = new Path(lambdaFileUri); - Configuration conf = new Configuration(); + Configuration conf = spark.sparkContext().hadoopConfiguration(); conf.set("fs.defaultFS", hdfsServerUri.concat(workingPath)); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java index 7d9f39d05..3ecb4f5e3 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkGenerateDoiAuthorList.java @@ -3,7 +3,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -53,13 +52,13 @@ public class SparkGenerateDoiAuthorList { .orElse(Boolean.TRUE); logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); final String workingPath = parser.get("workingPath"); - logger.info("workingPath: ", workingPath); + logger.info("workingPath: {}", workingPath); final String outputDoiAuthorListPath = parser.get("outputDoiAuthorListPath"); - logger.info("outputDoiAuthorListPath: ", outputDoiAuthorListPath); + logger.info("outputDoiAuthorListPath: {}", outputDoiAuthorListPath); final String authorsPath = parser.get("authorsPath"); - logger.info("authorsPath: ", authorsPath); + logger.info("authorsPath: {}", authorsPath); final String xmlWorksPath = parser.get("xmlWorksPath"); - logger.info("xmlWorksPath: ", xmlWorksPath); + logger.info("xmlWorksPath: {}", xmlWorksPath); SparkConf conf = new SparkConf(); runWithSparkSession( @@ -128,8 +127,7 @@ public class SparkGenerateDoiAuthorList { .concat( d1.stream(), d2.stream()); - List mergedAuthors = mergedStream.collect(Collectors.toList()); - return mergedAuthors; + return mergedStream.collect(Collectors.toList()); } if (d1 != null) { return d1; @@ -170,14 +168,6 @@ public class SparkGenerateDoiAuthorList { return authorData; } - private static WorkData loadWorkFromJson(Text orcidId, Text json) { - WorkData workData = new WorkData(); - workData.setOid(orcidId.toString()); - JsonElement jElement = new JsonParser().parse(json.toString()); - workData.setDoi(getJsonValue(jElement, "doi")); - return workData; - } - private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { JsonElement name = null; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java index 51326c610..1727f1825 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidAuthors.java @@ -4,7 +4,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static org.apache.spark.sql.functions.*; -import java.io.IOException; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -17,8 +16,10 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,11 +36,12 @@ import scala.Tuple2; public class SparkUpdateOrcidAuthors { + public static final Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidAuthors.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); public static void main(String[] args) throws Exception { - Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidAuthors.class); final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -53,7 +55,6 @@ public class SparkUpdateOrcidAuthors { .map(Boolean::valueOf) .orElse(Boolean.TRUE); final String workingPath = parser.get("workingPath"); -// final String outputPath = parser.get("outputPath"); SparkConf conf = new SparkConf(); runWithSparkSession( @@ -87,7 +88,7 @@ public class SparkUpdateOrcidAuthors { String jsonData = data._2().toString(); JsonElement jElement = new JsonParser().parse(jsonData); String statusCode = getJsonValue(jElement, "statusCode"); - String downloadDate = getJsonValue(jElement, "lastModifiedDate"); + if (statusCode.equals("200")) { String compressedData = getJsonValue(jElement, "compressedData"); if (StringUtils.isEmpty(compressedData)) { @@ -135,7 +136,7 @@ public class SparkUpdateOrcidAuthors { .col("authorData.oid") .equalTo(downloadedAuthorSummaryDS.col("authorData.oid")), "full_outer") - .map(value -> { + .map((MapFunction, AuthorSummary>) value -> { Optional opCurrent = Optional.ofNullable(value._1()); Optional opDownloaded = Optional.ofNullable(value._2()); if (!opCurrent.isPresent()) { @@ -183,7 +184,8 @@ public class SparkUpdateOrcidAuthors { .groupBy("authorData.oid") .agg(array_max(collect_list("downloadDate"))) .map( - row -> new Tuple2<>(row.get(0).toString(), row.get(1).toString()), + (MapFunction>) row -> new Tuple2<>(row.get(0).toString(), + row.get(1).toString()), Encoders.tuple(Encoders.STRING(), Encoders.STRING())) .toJavaRDD() .collect(); @@ -214,25 +216,24 @@ public class SparkUpdateOrcidAuthors { .dropDuplicates("downloadDate", "authorData"); cleanedDS .toJavaRDD() - .map(authorSummary -> OBJECT_MAPPER.writeValueAsString(authorSummary)) + .map(OBJECT_MAPPER::writeValueAsString) .saveAsTextFile(workingPath.concat("orcid_dataset/new_authors"), GzipCodec.class); long cleanedDSCount = cleanedDS.count(); - logger.info("report_oldAuthorsFoundAcc: " + oldAuthorsFoundAcc.value().toString()); - logger.info("report_newAuthorsFoundAcc: " + newAuthorsFoundAcc.value().toString()); - logger.info("report_updatedAuthorsFoundAcc: " + updatedAuthorsFoundAcc.value().toString()); - logger.info("report_errorCodeFoundAcc: " + errorCodeAuthorsFoundAcc.value().toString()); - logger.info("report_errorLoadingJsonFoundAcc: " + errorLoadingAuthorsJsonFoundAcc.value().toString()); - logger.info("report_errorParsingXMLFoundAcc: " + errorParsingAuthorsXMLFoundAcc.value().toString()); - logger.info("report_merged_count: " + mergedCount); - logger.info("report_cleaned_count: " + cleanedDSCount); + logger.info("report_oldAuthorsFoundAcc: {}", oldAuthorsFoundAcc.value()); + logger.info("report_newAuthorsFoundAcc: {}", newAuthorsFoundAcc.value()); + logger.info("report_updatedAuthorsFoundAcc: {}", updatedAuthorsFoundAcc.value()); + logger.info("report_errorCodeFoundAcc: {}", errorCodeAuthorsFoundAcc.value()); + logger.info("report_errorLoadingJsonFoundAcc: {}", errorLoadingAuthorsJsonFoundAcc.value()); + logger.info("report_errorParsingXMLFoundAcc: {}", errorParsingAuthorsXMLFoundAcc.value()); + logger.info("report_merged_count: {}", mergedCount); + logger.info("report_cleaned_count: {}", cleanedDSCount); }); } private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { - JsonElement name = null; - name = jElement.getAsJsonObject().get(property); + JsonElement name = jElement.getAsJsonObject().get(property); if (name != null && !name.isJsonNull()) { return name.getAsString(); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java index fa17e97e3..aad202ff6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidDatasets.java @@ -3,17 +3,16 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.util.LongAccumulator; @@ -26,20 +25,19 @@ import com.google.gson.JsonElement; import com.google.gson.JsonParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.orcid.AuthorSummary; import eu.dnetlib.dhp.schema.orcid.Work; import eu.dnetlib.dhp.schema.orcid.WorkDetail; -import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; import scala.Tuple2; public class SparkUpdateOrcidDatasets { + public static final Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidDatasets.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); public static void main(String[] args) throws Exception { - Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidDatasets.class); final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -53,7 +51,6 @@ public class SparkUpdateOrcidDatasets { .map(Boolean::valueOf) .orElse(Boolean.TRUE); final String workingPath = parser.get("workingPath"); -// final String outputPath = parser.get("outputPath"); SparkConf conf = new SparkConf(); runWithSparkSession( @@ -62,25 +59,6 @@ public class SparkUpdateOrcidDatasets { spark -> { JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - LongAccumulator oldAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("old_authors_found"); - LongAccumulator updatedAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("updated_authors_found"); - LongAccumulator newAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("new_authors_found"); - LongAccumulator errorCodeAuthorsFoundAcc = spark - .sparkContext() - .longAccumulator("error_code_authors_found"); - LongAccumulator errorLoadingAuthorsJsonFoundAcc = spark - .sparkContext() - .longAccumulator("error_loading_authors_json_found"); - LongAccumulator errorParsingAuthorsXMLFoundAcc = spark - .sparkContext() - .longAccumulator("error_parsing_authors_xml_found"); - LongAccumulator oldWorksFoundAcc = spark .sparkContext() .longAccumulator("old_works_found"); @@ -100,125 +78,11 @@ public class SparkUpdateOrcidDatasets { .sparkContext() .longAccumulator("error_parsing_works_xml_found"); -// JavaPairRDD xmlSummariesRDD = sc -// .sequenceFile(workingPath.concat("xml/authors/xml_authors.seq"), Text.class, Text.class); -// xmlSummariesRDD -// .map(seq -> { -// AuthorSummary authorSummary = XMLRecordParser -// .VTDParseAuthorSummary(seq._2().toString().getBytes()); -// authorSummary -// .setBase64CompressData(ArgumentApplicationParser.compressArgument(seq._2().toString())); -// return authorSummary; -// }) -// .filter(authorSummary -> authorSummary != null) -// .map(authorSummary -> JsonWriter.create(authorSummary)) -// .saveAsTextFile(workingPath.concat("orcid_dataset/authors"), GzipCodec.class); -// -// JavaPairRDD xmlWorksRDD = sc -// .sequenceFile(workingPath.concat("xml/works/*"), Text.class, Text.class); -// -// xmlWorksRDD -// .map(seq -> { -// WorkDetail workDetail = XMLRecordParserNoDoi.VTDParseWorkData(seq._2().toString().getBytes()); -// Work work = new Work(); -// work.setWorkDetail(workDetail); -// work.setBase64CompressData(ArgumentApplicationParser.compressArgument(seq._2().toString())); -// return work; -// }) -// .filter(work -> work != null) -// .map(work -> JsonWriter.create(work)) -// .saveAsTextFile(workingPath.concat("orcid_dataset/works"), GzipCodec.class); - -// Function, AuthorSummary> retrieveAuthorSummaryFunction = data -> { -// AuthorSummary authorSummary = new AuthorSummary(); -// String orcidId = data._1().toString(); -// String jsonData = data._2().toString(); -// JsonElement jElement = new JsonParser().parse(jsonData); -// String statusCode = getJsonValue(jElement, "statusCode"); -// String downloadDate = getJsonValue(jElement, "lastModifiedDate"); -// if (statusCode.equals("200")) { -// String compressedData = getJsonValue(jElement, "compressedData"); -// if (StringUtils.isEmpty(compressedData)) { -// errorLoadingAuthorsJsonFoundAcc.add(1); -// } else { -// String xmlAuthor = ArgumentApplicationParser.decompressValue(compressedData); -// try { -// authorSummary = XMLRecordParser -// .VTDParseAuthorSummary(xmlAuthor.getBytes()); -// authorSummary.setStatusCode(statusCode); -// authorSummary.setDownloadDate("2020-11-18 00:00:05.644768"); -// authorSummary.setBase64CompressData(compressedData); -// return authorSummary; -// } catch (Exception e) { -// logger.error("parsing xml " + orcidId + " [" + jsonData + "]", e); -// errorParsingAuthorsXMLFoundAcc.add(1); -// } -// } -// } else { -// authorSummary.setStatusCode(statusCode); -// authorSummary.setDownloadDate("2020-11-18 00:00:05.644768"); -// errorCodeAuthorsFoundAcc.add(1); -// } -// return authorSummary; -// }; -// -// Dataset downloadedAuthorSummaryDS = spark -// .createDataset( -// sc -// .sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class) -// .map(retrieveAuthorSummaryFunction) -// .rdd(), -// Encoders.bean(AuthorSummary.class)); -// Dataset currentAuthorSummaryDS = spark -// .createDataset( -// sc -// .textFile(workingPath.concat("orcid_dataset/authors/*")) -// .map(item -> OBJECT_MAPPER.readValue(item, AuthorSummary.class)) -// .rdd(), -// Encoders.bean(AuthorSummary.class)); -// currentAuthorSummaryDS -// .joinWith( -// downloadedAuthorSummaryDS, -// currentAuthorSummaryDS -// .col("authorData.oid") -// .equalTo(downloadedAuthorSummaryDS.col("authorData.oid")), -// "full_outer") -// .map(value -> { -// Optional opCurrent = Optional.ofNullable(value._1()); -// Optional opDownloaded = Optional.ofNullable(value._2()); -// if (!opCurrent.isPresent()) { -// newAuthorsFoundAcc.add(1); -// return opDownloaded.get(); -// } -// if (!opDownloaded.isPresent()) { -// oldAuthorsFoundAcc.add(1); -// return opCurrent.get(); -// } -// if (opCurrent.isPresent() && opDownloaded.isPresent()) { -// updatedAuthorsFoundAcc.add(1); -// return opDownloaded.get(); -// } -// return null; -// }, -// Encoders.bean(AuthorSummary.class)) -// .filter(Objects::nonNull) -// .toJavaRDD() -// .map(authorSummary -> OBJECT_MAPPER.writeValueAsString(authorSummary)) -// .saveAsTextFile(workingPath.concat("orcid_dataset/new_authors"), GzipCodec.class); -// -// logger.info("oldAuthorsFoundAcc: " + oldAuthorsFoundAcc.value().toString()); -// logger.info("newAuthorsFoundAcc: " + newAuthorsFoundAcc.value().toString()); -// logger.info("updatedAuthorsFoundAcc: " + updatedAuthorsFoundAcc.value().toString()); -// logger.info("errorCodeFoundAcc: " + errorCodeAuthorsFoundAcc.value().toString()); -// logger.info("errorLoadingJsonFoundAcc: " + errorLoadingAuthorsJsonFoundAcc.value().toString()); -// logger.info("errorParsingXMLFoundAcc: " + errorParsingAuthorsXMLFoundAcc.value().toString()); - - Function retrieveWorkFunction = jsonData -> { + final Function retrieveWorkFunction = jsonData -> { Work work = new Work(); JsonElement jElement = new JsonParser().parse(jsonData); String statusCode = getJsonValue(jElement, "statusCode"); work.setStatusCode(statusCode); - String downloadDate = getJsonValue(jElement, "lastModifiedDate"); work.setDownloadDate("2020-11-18 00:00:05.644768"); if (statusCode.equals("200")) { String compressedData = getJsonValue(jElement, "compressedData"); @@ -247,9 +111,7 @@ public class SparkUpdateOrcidDatasets { .createDataset( sc .textFile(workingPath + "downloads/updated_works/*") - .map(s -> { - return s.substring(21, s.length() - 1); - }) + .map(s -> s.substring(21, s.length() - 1)) .map(retrieveWorkFunction) .rdd(), Encoders.bean(Work.class)); @@ -271,7 +133,7 @@ public class SparkUpdateOrcidDatasets { .col("workDetail.oid") .equalTo(downloadedWorksDS.col("workDetail.oid"))), "full_outer") - .map(value -> { + .map((MapFunction, Work>) value -> { Optional opCurrent = Optional.ofNullable(value._1()); Optional opDownloaded = Optional.ofNullable(value._2()); if (!opCurrent.isPresent()) { @@ -291,23 +153,22 @@ public class SparkUpdateOrcidDatasets { Encoders.bean(Work.class)) .filter(Objects::nonNull) .toJavaRDD() - .map(work -> OBJECT_MAPPER.writeValueAsString(work)) + .map(OBJECT_MAPPER::writeValueAsString) .saveAsTextFile(workingPath.concat("orcid_dataset/new_works"), GzipCodec.class); - logger.info("oldWorksFoundAcc: " + oldWorksFoundAcc.value().toString()); - logger.info("newWorksFoundAcc: " + newWorksFoundAcc.value().toString()); - logger.info("updatedWorksFoundAcc: " + updatedWorksFoundAcc.value().toString()); - logger.info("errorCodeWorksFoundAcc: " + errorCodeWorksFoundAcc.value().toString()); - logger.info("errorLoadingJsonWorksFoundAcc: " + errorLoadingWorksJsonFoundAcc.value().toString()); - logger.info("errorParsingXMLWorksFoundAcc: " + errorParsingWorksXMLFoundAcc.value().toString()); + logger.info("oldWorksFoundAcc: {}", oldWorksFoundAcc.value()); + logger.info("newWorksFoundAcc: {}", newWorksFoundAcc.value()); + logger.info("updatedWorksFoundAcc: {}", updatedWorksFoundAcc.value()); + logger.info("errorCodeWorksFoundAcc: {}", errorCodeWorksFoundAcc.value()); + logger.info("errorLoadingJsonWorksFoundAcc: {}", errorLoadingWorksJsonFoundAcc.value()); + logger.info("errorParsingXMLWorksFoundAcc: {}", errorParsingWorksXMLFoundAcc.value()); }); } private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { - JsonElement name = null; - name = jElement.getAsJsonObject().get(property); + JsonElement name = jElement.getAsJsonObject().get(property); if (name != null && !name.isJsonNull()) { return name.getAsString(); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java index 5ebbc01ed..64523941d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkUpdateOrcidWorks.java @@ -13,6 +13,7 @@ import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.util.LongAccumulator; @@ -29,14 +30,16 @@ import eu.dnetlib.dhp.schema.orcid.Work; import eu.dnetlib.dhp.schema.orcid.WorkDetail; import eu.dnetlib.doiboost.orcid.util.HDFSUtil; import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; +import scala.Tuple2; public class SparkUpdateOrcidWorks { + public static final Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidWorks.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); public static void main(String[] args) throws Exception { - Logger logger = LoggerFactory.getLogger(SparkUpdateOrcidWorks.class); final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils @@ -83,7 +86,6 @@ public class SparkUpdateOrcidWorks { JsonElement jElement = new JsonParser().parse(jsonData); String statusCode = getJsonValue(jElement, "statusCode"); work.setStatusCode(statusCode); - String downloadDate = getJsonValue(jElement, "lastModifiedDate"); work.setDownloadDate(Long.toString(System.currentTimeMillis())); if (statusCode.equals("200")) { String compressedData = getJsonValue(jElement, "compressedData"); @@ -112,9 +114,7 @@ public class SparkUpdateOrcidWorks { .createDataset( sc .textFile(workingPath + "downloads/updated_works/*") - .map(s -> { - return s.substring(21, s.length() - 1); - }) + .map(s -> s.substring(21, s.length() - 1)) .map(retrieveWorkFunction) .rdd(), Encoders.bean(Work.class)); @@ -136,7 +136,7 @@ public class SparkUpdateOrcidWorks { .col("workDetail.oid") .equalTo(downloadedWorksDS.col("workDetail.oid"))), "full_outer") - .map(value -> { + .map((MapFunction, Work>) value -> { Optional opCurrent = Optional.ofNullable(value._1()); Optional opDownloaded = Optional.ofNullable(value._2()); if (!opCurrent.isPresent()) { @@ -159,12 +159,12 @@ public class SparkUpdateOrcidWorks { .map(work -> OBJECT_MAPPER.writeValueAsString(work)) .saveAsTextFile(workingPath.concat("orcid_dataset/new_works"), GzipCodec.class); - logger.info("oldWorksFoundAcc: " + oldWorksFoundAcc.value().toString()); - logger.info("newWorksFoundAcc: " + newWorksFoundAcc.value().toString()); - logger.info("updatedWorksFoundAcc: " + updatedWorksFoundAcc.value().toString()); - logger.info("errorCodeWorksFoundAcc: " + errorCodeWorksFoundAcc.value().toString()); - logger.info("errorLoadingJsonWorksFoundAcc: " + errorLoadingWorksJsonFoundAcc.value().toString()); - logger.info("errorParsingXMLWorksFoundAcc: " + errorParsingWorksXMLFoundAcc.value().toString()); + logger.info("oldWorksFoundAcc: {}", oldWorksFoundAcc.value()); + logger.info("newWorksFoundAcc: {}", newWorksFoundAcc.value()); + logger.info("updatedWorksFoundAcc: {}", updatedWorksFoundAcc.value()); + logger.info("errorCodeWorksFoundAcc: {}", errorCodeWorksFoundAcc.value()); + logger.info("errorLoadingJsonWorksFoundAcc: {}", errorLoadingWorksJsonFoundAcc.value()); + logger.info("errorParsingXMLWorksFoundAcc: {}", errorParsingWorksXMLFoundAcc.value()); String lastModifiedDateFromLambdaFile = HDFSUtil .readFromTextFile(hdfsServerUri, workingPath, "last_modified_date_from_lambda_file.txt"); @@ -175,8 +175,7 @@ public class SparkUpdateOrcidWorks { private static String getJsonValue(JsonElement jElement, String property) { if (jElement.getAsJsonObject().has(property)) { - JsonElement name = null; - name = jElement.getAsJsonObject().get(property); + JsonElement name = jElement.getAsJsonObject().get(property); if (name != null && !name.isJsonNull()) { return name.getAsString(); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java index c85b5b691..af6def227 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SummariesDecompressor.java @@ -20,6 +20,9 @@ import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.hadoop.io.compress.GzipCodec; import org.mortbay.log.Log; +import com.ximpleware.ParseException; + +import eu.dnetlib.dhp.parser.utility.VtdException; import eu.dnetlib.dhp.schema.orcid.AuthorData; import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; @@ -28,22 +31,23 @@ public class SummariesDecompressor { private static final int MAX_XML_RECORDS_PARSED = -1; - public static void parseGzSummaries(Configuration conf, String inputUri, Path outputPath) - throws Exception { - String uri = inputUri; - FileSystem fs = FileSystem.get(URI.create(uri), conf); - Path inputPath = new Path(uri); + private SummariesDecompressor() { + } + + public static void parseGzSummaries(Configuration conf, String inputUri, Path outputPath) throws IOException { + FileSystem fs = FileSystem.get(URI.create(inputUri), conf); + Path inputPath = new Path(inputUri); CompressionCodecFactory factory = new CompressionCodecFactory(conf); CompressionCodec codec = factory.getCodec(inputPath); if (codec == null) { - System.err.println("No codec found for " + uri); + System.err.println("No codec found for " + inputUri); System.exit(1); } - CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension()); + CompressionCodecFactory.removeSuffix(inputUri, codec.getDefaultExtension()); InputStream gzipInputStream = null; try { gzipInputStream = codec.createInputStream(fs.open(inputPath)); - parseTarSummaries(fs, conf, gzipInputStream, outputPath); + parseTarSummaries(conf, gzipInputStream, outputPath); } finally { Log.debug("Closing gzip stream"); @@ -52,7 +56,7 @@ public class SummariesDecompressor { } private static void parseTarSummaries( - FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) { + Configuration conf, InputStream gzipInputStream, Path outputPath) { int counter = 0; int nameFound = 0; int surnameFound = 0; @@ -163,7 +167,7 @@ public class SummariesDecompressor { } public static void extractXML(Configuration conf, String inputUri, Path outputPath) - throws Exception { + throws IOException, VtdException, ParseException { String uri = inputUri; FileSystem fs = FileSystem.get(URI.create(uri), conf); Path inputPath = new Path(uri); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java index a2342f7b4..9eb73b240 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/json/JsonHelper.java @@ -7,6 +7,9 @@ import eu.dnetlib.dhp.schema.orcid.WorkDetail; public class JsonHelper { + private JsonHelper() { + } + public static String createOidWork(WorkDetail workData) { return new Gson().toJson(workData); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java index e1a913476..bbd2e1f7e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/HDFSUtil.java @@ -2,10 +2,8 @@ package eu.dnetlib.doiboost.orcid.util; import java.io.*; -import java.net.URI; import java.nio.charset.StandardCharsets; -import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -14,42 +12,39 @@ import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.gson.Gson; - -import eu.dnetlib.doiboost.orcid.SparkDownloadOrcidAuthors; - public class HDFSUtil { static Logger logger = LoggerFactory.getLogger(HDFSUtil.class); + private HDFSUtil() { + } + private static FileSystem getFileSystem(String hdfsServerUri) throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsServerUri); - FileSystem fileSystem = FileSystem.get(conf); - return fileSystem; + return FileSystem.get(conf); } public static String readFromTextFile(String hdfsServerUri, String workingPath, String path) throws IOException { FileSystem fileSystem = getFileSystem(hdfsServerUri); Path toReadPath = new Path(workingPath.concat(path)); if (!fileSystem.exists(toReadPath)) { - throw new RuntimeException("File not exist: " + path); + throw new IOException("File not exist: " + path); } - logger.info("Last_update_path " + toReadPath); + logger.info("Last_update_path {}", toReadPath); FSDataInputStream inputStream = new FSDataInputStream(fileSystem.open(toReadPath)); - BufferedReader br = new BufferedReader(new InputStreamReader(inputStream)); - StringBuffer sb = new StringBuffer(); - try { + try (BufferedReader br = new BufferedReader(new InputStreamReader(inputStream))) { + StringBuilder sb = new StringBuilder(); + String line; while ((line = br.readLine()) != null) { sb.append(line); } - } finally { - br.close(); + + String buffer = sb.toString(); + logger.info("Last_update: {}", buffer); + return buffer; } - String buffer = sb.toString(); - logger.info("Last_update: " + buffer); - return buffer; } public static void writeToTextFile(String hdfsServerUri, String workingPath, String path, String text) @@ -60,8 +55,8 @@ public class HDFSUtil { fileSystem.delete(toWritePath, true); } FSDataOutputStream os = fileSystem.create(toWritePath); - BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8)); - br.write(text); - br.close(); + try (BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8))) { + br.write(text); + } } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java index 52e076105..e8745aa96 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParser.java @@ -1,7 +1,6 @@ package eu.dnetlib.doiboost.orcid.xml; -import java.io.IOException; import java.util.*; import org.apache.commons.lang3.StringUtils; @@ -38,6 +37,9 @@ public class XMLRecordParser { private static final String NS_ERROR = "error"; + private XMLRecordParser() { + } + public static AuthorData VTDParseAuthorData(byte[] bytes) throws VtdException, ParseException { final VTDGen vg = new VTDGen(); @@ -90,46 +92,6 @@ public class XMLRecordParser { authorData.setOtherNames(otherNames); } -// final String creationMethod = VtdUtilityParser.getSingleValue(ap, vn, "//history:creation-method"); -// if (StringUtils.isNoneBlank(creationMethod)) { -// authorData.setCreationMethod(creationMethod); -// } -// -// final String completionDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:completion-date"); -// if (StringUtils.isNoneBlank(completionDate)) { -// authorData.setCompletionDate(completionDate); -// } -// -// final String submissionDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:submission-date"); -// if (StringUtils.isNoneBlank(submissionDate)) { -// authorData.setSubmissionDate(submissionDate); -// } -// -// final String claimed = VtdUtilityParser.getSingleValue(ap, vn, "//history:claimed"); -// if (StringUtils.isNoneBlank(claimed)) { -// authorData.setClaimed(Boolean.parseBoolean(claimed)); -// } -// -// final String verifiedEmail = VtdUtilityParser.getSingleValue(ap, vn, "//history:verified-email"); -// if (StringUtils.isNoneBlank(verifiedEmail)) { -// authorData.setVerifiedEmail(Boolean.parseBoolean(verifiedEmail)); -// } -// -// final String verifiedPrimaryEmail = VtdUtilityParser.getSingleValue(ap, vn, "//history:verified-primary-email"); -// if (StringUtils.isNoneBlank(verifiedPrimaryEmail)) { -// authorData.setVerifiedPrimaryEmail(Boolean.parseBoolean(verifiedPrimaryEmail)); -// } -// -// final String deactivationDate = VtdUtilityParser.getSingleValue(ap, vn, "//history:deactivation-date"); -// if (StringUtils.isNoneBlank(deactivationDate)) { -// authorData.setDeactivationDate(deactivationDate); -// } -// -// final String lastModifiedDate = VtdUtilityParser -// .getSingleValue(ap, vn, "//history:history/common:last-modified-date"); -// if (StringUtils.isNoneBlank(lastModifiedDate)) { -// authorData.setLastModifiedDate(lastModifiedDate); -// } return authorData; } @@ -207,7 +169,7 @@ public class XMLRecordParser { } public static Map retrieveWorkIdLastModifiedDate(byte[] bytes) - throws ParseException, XPathParseException, NavException, XPathEvalException, IOException { + throws ParseException, XPathParseException, NavException, XPathEvalException { final VTDGen vg = new VTDGen(); vg.setDoc(bytes); vg.parse(true); @@ -251,15 +213,15 @@ public class XMLRecordParser { ap.declareXPathNameSpace(NS_ERROR, NS_ERROR_URL); ap.declareXPathNameSpace(NS_HISTORY, NS_HISTORY_URL); - AuthorData authorData = retrieveAuthorData(ap, vn, bytes); - AuthorHistory authorHistory = retrieveAuthorHistory(ap, vn, bytes); + AuthorData authorData = retrieveAuthorData(ap, vn); + AuthorHistory authorHistory = retrieveAuthorHistory(ap, vn); AuthorSummary authorSummary = new AuthorSummary(); authorSummary.setAuthorData(authorData); authorSummary.setAuthorHistory(authorHistory); return authorSummary; } - private static AuthorData retrieveAuthorData(AutoPilot ap, VTDNav vn, byte[] bytes) + private static AuthorData retrieveAuthorData(AutoPilot ap, VTDNav vn) throws VtdException { AuthorData authorData = new AuthorData(); final List errors = VtdUtilityParser.getTextValue(ap, vn, "//error:response-code"); @@ -300,7 +262,7 @@ public class XMLRecordParser { return authorData; } - private static AuthorHistory retrieveAuthorHistory(AutoPilot ap, VTDNav vn, byte[] bytes) + private static AuthorHistory retrieveAuthorHistory(AutoPilot ap, VTDNav vn) throws VtdException { AuthorHistory authorHistory = new AuthorHistory(); final String creationMethod = VtdUtilityParser.getSingleValue(ap, vn, "//history:creation-method"); diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java index 124a1b9ef..ddbf71bbb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/ActivitiesDumpReader.java @@ -33,6 +33,9 @@ public class ActivitiesDumpReader { private static final int MAX_XML_WORKS_PARSED = -1; private static final int XML_WORKS_PARSED_COUNTER_LOG_INTERVAL = 100000; + private ActivitiesDumpReader() { + } + public static void parseGzActivities(Configuration conf, String inputUri, Path outputPath) throws Exception { String uri = inputUri; @@ -48,7 +51,7 @@ public class ActivitiesDumpReader { InputStream gzipInputStream = null; try { gzipInputStream = codec.createInputStream(fs.open(inputPath)); - parseTarActivities(fs, conf, gzipInputStream, outputPath); + parseTarActivities(conf, gzipInputStream, outputPath); } finally { Log.debug("Closing gzip stream"); @@ -56,8 +59,7 @@ public class ActivitiesDumpReader { } } - private static void parseTarActivities( - FileSystem fs, Configuration conf, InputStream gzipInputStream, Path outputPath) { + private static void parseTarActivities(Configuration conf, InputStream gzipInputStream, Path outputPath) { int counter = 0; int noDoiFound = 0; int errorFromOrcidFound = 0; @@ -73,7 +75,7 @@ public class ActivitiesDumpReader { SequenceFile.Writer.valueClass(Text.class))) { while ((entry = tais.getNextTarEntry()) != null) { String filename = entry.getName(); - StringBuffer buffer = new StringBuffer(); + StringBuilder builder = new StringBuilder(); try { if (entry.isDirectory() || !filename.contains("works")) { @@ -83,12 +85,12 @@ public class ActivitiesDumpReader { BufferedReader br = new BufferedReader(new InputStreamReader(tais)); // Read directly from // tarInput String line; - buffer = new StringBuffer(); + builder = new StringBuilder(); while ((line = br.readLine()) != null) { - buffer.append(line); + builder.append(line); } WorkDetail workDetail = XMLRecordParserNoDoi - .VTDParseWorkData(buffer.toString().getBytes()); + .VTDParseWorkData(builder.toString().getBytes()); if (workDetail != null) { if (workDetail.getErrorCode() != null) { errorFromOrcidFound += 1; @@ -123,7 +125,7 @@ public class ActivitiesDumpReader { } } else { - Log.warn("Data not retrievable [" + entry.getName() + "] " + buffer); + Log.warn("Data not retrievable [" + entry.getName() + "] " + builder); xmlParserErrorFound += 1; } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java index 4a64124d1..5c23a33a8 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/GenOrcidAuthorWork.java @@ -3,9 +3,9 @@ package eu.dnetlib.doiboost.orcidnodoi; import java.io.IOException; +import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.mortbay.log.Log; @@ -16,7 +16,6 @@ import eu.dnetlib.doiboost.orcid.OrcidDSManager; * This job generates one sequence file, the key is an orcid identifier and the * value is an orcid publication in json format */ - public class GenOrcidAuthorWork extends OrcidDSManager { private String activitiesFileNameTarGz; @@ -30,13 +29,12 @@ public class GenOrcidAuthorWork extends OrcidDSManager { public void generateAuthorsDOIsData() throws Exception { Configuration conf = initConfigurationObject(); - FileSystem fs = initFileSystemObject(conf); String tarGzUri = hdfsServerUri.concat(workingPath).concat(activitiesFileNameTarGz); Path outputPath = new Path(hdfsServerUri.concat(workingPath).concat(outputWorksPath)); ActivitiesDumpReader.parseGzActivities(conf, tarGzUri, outputPath); } - private void loadArgs(String[] args) throws Exception { + private void loadArgs(String[] args) throws ParseException, IOException { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java index 1d47808ef..db3b14923 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/SparkGenEnrichedOrcidWorks.java @@ -3,7 +3,7 @@ package eu.dnetlib.doiboost.orcidnodoi; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; +import java.io.FileNotFoundException; import java.util.Arrays; import java.util.List; import java.util.Objects; @@ -14,21 +14,16 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.util.LongAccumulator; -import org.mortbay.log.Log; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.gson.Gson; -import com.google.gson.JsonElement; -import com.google.gson.JsonParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; @@ -75,7 +70,7 @@ public class SparkGenEnrichedOrcidWorks { spark -> { String lastUpdate = HDFSUtil.readFromTextFile(hdfsServerUri, workingPath, "last_update.txt"); if (StringUtils.isBlank(lastUpdate)) { - throw new RuntimeException("last update info not found"); + throw new FileNotFoundException("last update info not found"); } final String dateOfCollection = lastUpdate.substring(0, 10); JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); @@ -86,10 +81,10 @@ public class SparkGenEnrichedOrcidWorks { .textFile(workingPath.concat(orcidDataFolder).concat("/authors/*")) .map(item -> OBJECT_MAPPER.readValue(item, AuthorSummary.class)) .filter(authorSummary -> authorSummary.getAuthorData() != null) - .map(authorSummary -> authorSummary.getAuthorData()) + .map(AuthorSummary::getAuthorData) .rdd(), Encoders.bean(AuthorData.class)); - logger.info("Authors data loaded: " + authorDataset.count()); + logger.info("Authors data loaded: {}", authorDataset.count()); Dataset workDataset = spark .createDataset( @@ -97,7 +92,7 @@ public class SparkGenEnrichedOrcidWorks { .textFile(workingPath.concat(orcidDataFolder).concat("/works/*")) .map(item -> OBJECT_MAPPER.readValue(item, Work.class)) .filter(work -> work.getWorkDetail() != null) - .map(work -> work.getWorkDetail()) + .map(Work::getWorkDetail) .filter(work -> work.getErrorCode() == null) .filter( work -> work @@ -107,7 +102,7 @@ public class SparkGenEnrichedOrcidWorks { .noneMatch(e -> e.getType().equalsIgnoreCase("doi"))) .rdd(), Encoders.bean(WorkDetail.class)); - logger.info("Works data loaded: " + workDataset.count()); + logger.info("Works data loaded: {}", workDataset.count()); final LongAccumulator warnNotFoundContributors = spark .sparkContext() @@ -122,7 +117,7 @@ public class SparkGenEnrichedOrcidWorks { WorkDetail w = value._1; AuthorData a = value._2; if (w.getContributors() == null - || (w.getContributors() != null && w.getContributors().size() == 0)) { + || (w.getContributors() != null && w.getContributors().isEmpty())) { Contributor c = new Contributor(); c.setName(a.getName()); c.setSurname(a.getSurname()); @@ -163,7 +158,6 @@ public class SparkGenEnrichedOrcidWorks { final PublicationToOaf publicationToOaf = new PublicationToOaf( parsedPublications, enrichedPublications, - errorsGeneric, errorsInvalidTitle, errorsNotFoundAuthors, errorsInvalidType, @@ -172,13 +166,10 @@ public class SparkGenEnrichedOrcidWorks { titleNotProvidedAcc, noUrlAcc, dateOfCollection); + JavaRDD oafPublicationRDD = enrichedWorksRDD - .map( - e -> { - return (Publication) publicationToOaf - .generatePublicationActionsFromJson(e._2()); - }) - .filter(p -> p != null); + .map(e -> (Publication) publicationToOaf.generatePublicationActionsFromJson(e._2())) + .filter(Objects::nonNull); sc.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress", "true"); @@ -186,7 +177,7 @@ public class SparkGenEnrichedOrcidWorks { .mapToPair( p -> new Tuple2<>(p.getClass().toString(), OBJECT_MAPPER.writeValueAsString(new AtomicAction<>(Publication.class, p)))) - .mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2()))) + .mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))) .saveAsNewAPIHadoopFile( outputEnrichedWorksPath, Text.class, @@ -194,17 +185,17 @@ public class SparkGenEnrichedOrcidWorks { SequenceFileOutputFormat.class, sc.hadoopConfiguration()); - logger.info("parsedPublications: " + parsedPublications.value().toString()); - logger.info("enrichedPublications: " + enrichedPublications.value().toString()); - logger.info("warnNotFoundContributors: " + warnNotFoundContributors.value().toString()); - logger.info("errorsGeneric: " + errorsGeneric.value().toString()); - logger.info("errorsInvalidTitle: " + errorsInvalidTitle.value().toString()); - logger.info("errorsNotFoundAuthors: " + errorsNotFoundAuthors.value().toString()); - logger.info("errorsInvalidType: " + errorsInvalidType.value().toString()); - logger.info("otherTypeFound: " + otherTypeFound.value().toString()); - logger.info("deactivatedAcc: " + deactivatedAcc.value().toString()); - logger.info("titleNotProvidedAcc: " + titleNotProvidedAcc.value().toString()); - logger.info("noUrlAcc: " + noUrlAcc.value().toString()); + logger.info("parsedPublications: {}", parsedPublications.value()); + logger.info("enrichedPublications: {}", enrichedPublications.value()); + logger.info("warnNotFoundContributors: {}", warnNotFoundContributors.value()); + logger.info("errorsGeneric: {}", errorsGeneric.value()); + logger.info("errorsInvalidTitle: {}", errorsInvalidTitle.value()); + logger.info("errorsNotFoundAuthors: {}", errorsNotFoundAuthors.value()); + logger.info("errorsInvalidType: {}", errorsInvalidType.value()); + logger.info("otherTypeFound: {}", otherTypeFound.value()); + logger.info("deactivatedAcc: {}", deactivatedAcc.value()); + logger.info("titleNotProvidedAcc: {}", titleNotProvidedAcc.value()); + logger.info("noUrlAcc: {}", noUrlAcc.value()); }); } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java index 23e9dd884..33f3b3bbb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/json/JsonWriter.java @@ -18,6 +18,9 @@ public class JsonWriter { public static final com.fasterxml.jackson.databind.ObjectMapper OBJECT_MAPPER = new ObjectMapper() .setSerializationInclusion(JsonInclude.Include.NON_NULL); + private JsonWriter() { + } + public static String create(AuthorData authorData) throws JsonProcessingException { return OBJECT_MAPPER.writeValueAsString(authorData); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java index 215753899..f92040c24 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/oaf/PublicationToOaf.java @@ -18,6 +18,7 @@ import com.google.gson.*; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.doiboost.orcidnodoi.util.DumpToActionsUtility; @@ -26,21 +27,19 @@ import eu.dnetlib.doiboost.orcidnodoi.util.Pair; /** * This class converts an orcid publication from json format to oaf */ - public class PublicationToOaf implements Serializable { static Logger logger = LoggerFactory.getLogger(PublicationToOaf.class); - public final static String orcidPREFIX = "orcid_______"; + public static final String orcidPREFIX = "orcid_______"; public static final String OPENAIRE_PREFIX = "openaire____"; - public static final String SEPARATOR = "::"; + public static final String SEPARATOR = IdentifierFactory.ID_SEPARATOR; public static final String DEACTIVATED_NAME = "Given Names Deactivated"; public static final String DEACTIVATED_SURNAME = "Family Name Deactivated"; private String dateOfCollection = ""; private final LongAccumulator parsedPublications; private final LongAccumulator enrichedPublications; - private final LongAccumulator errorsGeneric; private final LongAccumulator errorsInvalidTitle; private final LongAccumulator errorsNotFoundAuthors; private final LongAccumulator errorsInvalidType; @@ -52,7 +51,6 @@ public class PublicationToOaf implements Serializable { public PublicationToOaf( LongAccumulator parsedPublications, LongAccumulator enrichedPublications, - LongAccumulator errorsGeneric, LongAccumulator errorsInvalidTitle, LongAccumulator errorsNotFoundAuthors, LongAccumulator errorsInvalidType, @@ -63,7 +61,6 @@ public class PublicationToOaf implements Serializable { String dateOfCollection) { this.parsedPublications = parsedPublications; this.enrichedPublications = enrichedPublications; - this.errorsGeneric = errorsGeneric; this.errorsInvalidTitle = errorsInvalidTitle; this.errorsNotFoundAuthors = errorsNotFoundAuthors; this.errorsInvalidType = errorsInvalidType; @@ -77,7 +74,6 @@ public class PublicationToOaf implements Serializable { public PublicationToOaf() { this.parsedPublications = null; this.enrichedPublications = null; - this.errorsGeneric = null; this.errorsInvalidTitle = null; this.errorsNotFoundAuthors = null; this.errorsInvalidType = null; @@ -88,19 +84,8 @@ public class PublicationToOaf implements Serializable { this.dateOfCollection = null; } - private static final Map> datasources = new HashMap>() { - - { - put( - ModelConstants.ORCID, - new Pair<>(ModelConstants.ORCID.toUpperCase(), OPENAIRE_PREFIX + SEPARATOR + ModelConstants.ORCID)); - - } - }; - // json external id will be mapped to oaf:pid/@classid Map to oaf:pid/@classname private static final Map> externalIds = new HashMap>() { - { put("ark".toLowerCase(), new Pair<>("ark", "ark")); put("arxiv".toLowerCase(), new Pair<>("arXiv", "arXiv")); @@ -208,10 +193,8 @@ public class PublicationToOaf implements Serializable { .setTitle( titles .stream() - .map(t -> { - return mapStructuredProperty(t, ModelConstants.MAIN_TITLE_QUALIFIER, null); - }) - .filter(s -> s != null) + .map(t -> mapStructuredProperty(t, ModelConstants.MAIN_TITLE_QUALIFIER, null)) + .filter(Objects::nonNull) .collect(Collectors.toList())); // Adding identifier final String id = getStringValue(rootElement, "id"); @@ -226,7 +209,7 @@ public class PublicationToOaf implements Serializable { publication.setId(sourceId); // Adding relevant date - settingRelevantDate(rootElement, publication, "publication_date", "issued", true); + settingRelevantDate(rootElement, publication, "issued", true); // Adding collectedfrom publication.setCollectedfrom(Arrays.asList(createCollectedFrom())); @@ -243,7 +226,7 @@ public class PublicationToOaf implements Serializable { Map publicationType = typologiesMapping.get(type); if ((publicationType == null || publicationType.isEmpty()) && errorsInvalidType != null) { errorsInvalidType.add(1); - logger.error("publication_type_not_found: " + type); + logger.error("publication_type_not_found: {}", type); return null; } @@ -307,7 +290,7 @@ public class PublicationToOaf implements Serializable { // Adding authors final List authors = createAuthors(rootElement); - if (authors != null && authors.size() > 0) { + if (authors != null && !authors.isEmpty()) { if (authors.stream().filter(a -> { return ((Objects.nonNull(a.getName()) && a.getName().equals(DEACTIVATED_NAME)) || (Objects.nonNull(a.getSurname()) && a.getSurname().equals(DEACTIVATED_SURNAME))); @@ -322,8 +305,7 @@ public class PublicationToOaf implements Serializable { } else { if (authors == null) { Gson gson = new GsonBuilder().setPrettyPrinting().create(); - String json = gson.toJson(rootElement); - throw new RuntimeException("not_valid_authors: " + json); + throw new RuntimeException("not_valid_authors: " + gson.toJson(rootElement)); } else { if (errorsNotFoundAuthors != null) { errorsNotFoundAuthors.add(1); @@ -434,7 +416,6 @@ public class PublicationToOaf implements Serializable { private void settingRelevantDate(final JsonObject rootElement, final Publication publication, - final String jsonKey, final String dictionaryKey, final boolean addToDateOfAcceptance) { @@ -450,10 +431,8 @@ public class PublicationToOaf implements Serializable { Arrays .asList(pubDate) .stream() - .map(r -> { - return mapStructuredProperty(r, q, null); - }) - .filter(s -> s != null) + .map(r -> mapStructuredProperty(r, q, null)) + .filter(Objects::nonNull) .collect(Collectors.toList())); } } @@ -498,7 +477,7 @@ public class PublicationToOaf implements Serializable { final String type = getStringValue(rootElement, "type"); if (!typologiesMapping.containsKey(type)) { - logger.error("unknowntype_" + type); + logger.error("unknowntype_{}", type); if (errorsInvalidType != null) { errorsInvalidType.add(1); } @@ -550,7 +529,7 @@ public class PublicationToOaf implements Serializable { } private StructuredProperty mapStructuredProperty(String value, Qualifier qualifier, DataInfo dataInfo) { - if (value == null | StringUtils.isBlank(value)) { + if (value == null || StringUtils.isBlank(value)) { return null; } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java index fff753ff3..e69b496b7 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/similarity/AuthorMatcher.java @@ -1,26 +1,14 @@ package eu.dnetlib.doiboost.orcidnodoi.similarity; -import java.io.IOException; import java.text.Normalizer; import java.util.*; import org.apache.commons.lang3.StringUtils; import org.apache.commons.text.similarity.JaroWinklerSimilarity; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import com.ximpleware.NavException; -import com.ximpleware.ParseException; -import com.ximpleware.XPathEvalException; -import com.ximpleware.XPathParseException; - -import eu.dnetlib.dhp.parser.utility.VtdException; import eu.dnetlib.dhp.schema.orcid.AuthorData; import eu.dnetlib.dhp.schema.orcid.Contributor; -import eu.dnetlib.dhp.schema.orcid.WorkDetail; /** * This class is used for searching from a list of publication contributors a @@ -29,18 +17,16 @@ import eu.dnetlib.dhp.schema.orcid.WorkDetail; * the match is found (if exist) author informations are used to enrich the * matched contribuotr inside contributors list */ - public class AuthorMatcher { - private static final Logger logger = LoggerFactory.getLogger(AuthorMatcher.class); - public static final Double threshold = 0.8; + public static final Double THRESHOLD = 0.8; - public static void match(AuthorData author, List contributors) - throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { + private AuthorMatcher() { + } + public static void match(AuthorData author, List contributors) { int matchCounter = 0; List matchCounters = Arrays.asList(matchCounter); - Contributor contributor = null; contributors .stream() .filter(c -> !StringUtils.isBlank(c.getCreditName())) @@ -62,8 +48,8 @@ public class AuthorMatcher { c.setScore(bestMatch(author.getName(), author.getSurname(), c.getCreditName())); return c; }) - .filter(c -> c.getScore() >= threshold) - .max(Comparator.comparing(c -> c.getScore())); + .filter(c -> c.getScore() >= THRESHOLD) + .max(Comparator.comparing(Contributor::getScore)); Contributor bestMatchContributor = null; if (optCon.isPresent()) { bestMatchContributor = optCon.get(); @@ -73,14 +59,14 @@ public class AuthorMatcher { } else if (matchCounters.get(0) > 1) { Optional optCon = contributors .stream() - .filter(c -> c.isSimpleMatch()) + .filter(Contributor::isSimpleMatch) .filter(c -> !StringUtils.isBlank(c.getCreditName())) .map(c -> { c.setScore(bestMatch(author.getName(), author.getSurname(), c.getCreditName())); return c; }) - .filter(c -> c.getScore() >= threshold) - .max(Comparator.comparing(c -> c.getScore())); + .filter(c -> c.getScore() >= THRESHOLD) + .max(Comparator.comparing(Contributor::getScore)); Contributor bestMatchContributor = null; if (optCon.isPresent()) { bestMatchContributor = optCon.get(); @@ -92,7 +78,7 @@ public class AuthorMatcher { } public static boolean simpleMatchOnOtherNames(String name, List otherNames) { - if (otherNames == null || (otherNames != null && otherNames.isEmpty())) { + if (otherNames == null || otherNames.isEmpty()) { return false; } return otherNames.stream().filter(o -> simpleMatch(name, o)).count() > 0; @@ -132,8 +118,7 @@ public class AuthorMatcher { } public static Double similarity(String nameA, String surnameA, String nameB, String surnameB) { - Double score = similarityJaroWinkler(nameA, surnameA, nameB, surnameB); - return score; + return similarityJaroWinkler(nameA, surnameA, nameB, surnameB); } private static Double similarityJaroWinkler(String nameA, String surnameA, String nameB, String surnameB) { @@ -179,7 +164,7 @@ public class AuthorMatcher { public static void updateAuthorsSimilarityMatch(List contributors, AuthorData author) { contributors .stream() - .filter(c -> c.isBestMatch()) + .filter(Contributor::isBestMatch) .forEach(c -> { c.setName(author.getName()); c.setSurname(author.getSurname()); @@ -206,9 +191,4 @@ public class AuthorMatcher { } } - private static String toJson(WorkDetail work) { - GsonBuilder builder = new GsonBuilder(); - Gson gson = builder.create(); - return gson.toJson(work); - } } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java index 29791bbbd..b4c12eed3 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcidnodoi/xml/XMLRecordParserNoDoi.java @@ -5,9 +5,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import com.ximpleware.*; import eu.dnetlib.dhp.parser.utility.VtdException; @@ -20,21 +17,10 @@ import eu.dnetlib.dhp.schema.orcid.WorkDetail; /** * This class is used for parsing xml data with vtd parser */ - public class XMLRecordParserNoDoi { - private static final Logger logger = LoggerFactory.getLogger(XMLRecordParserNoDoi.class); - private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common"; private static final String NS_COMMON = "common"; - private static final String NS_PERSON_URL = "http://www.orcid.org/ns/person"; - private static final String NS_PERSON = "person"; - private static final String NS_DETAILS_URL = "http://www.orcid.org/ns/personal-details"; - private static final String NS_DETAILS = "personal-details"; - private static final String NS_OTHER_URL = "http://www.orcid.org/ns/other-name"; - private static final String NS_OTHER = "other-name"; - private static final String NS_RECORD_URL = "http://www.orcid.org/ns/record"; - private static final String NS_RECORD = "record"; private static final String NS_ERROR_URL = "http://www.orcid.org/ns/error"; private static final String NS_WORK = "work"; @@ -42,6 +28,9 @@ public class XMLRecordParserNoDoi { private static final String NS_ERROR = "error"; + private XMLRecordParserNoDoi() { + } + public static WorkDetail VTDParseWorkData(byte[] bytes) throws VtdException, ParseException, XPathParseException, NavException, XPathEvalException { @@ -100,16 +89,16 @@ public class XMLRecordParserNoDoi { workData.setUrls(urls); } - workData.setPublicationDates(getPublicationDates(vg, vn, ap)); - workData.setExtIds(getExternalIds(vg, vn, ap)); - workData.setContributors(getContributors(vg, vn, ap)); + workData.setPublicationDates(getPublicationDates(vn, ap)); + workData.setExtIds(getExternalIds(vn, ap)); + workData.setContributors(getContributors(vn, ap)); return workData; } - private static List getPublicationDates(VTDGen vg, VTDNav vn, AutoPilot ap) + private static List getPublicationDates(VTDNav vn, AutoPilot ap) throws XPathParseException, NavException, XPathEvalException { - List publicationDates = new ArrayList(); + List publicationDates = new ArrayList<>(); int yearIndex = 0; ap.selectXPath("//common:publication-date/common:year"); while (ap.evalXPath() != -1) { @@ -142,9 +131,9 @@ public class XMLRecordParserNoDoi { return publicationDates; } - private static List getExternalIds(VTDGen vg, VTDNav vn, AutoPilot ap) + private static List getExternalIds(VTDNav vn, AutoPilot ap) throws XPathParseException, NavException, XPathEvalException { - List extIds = new ArrayList(); + List extIds = new ArrayList<>(); int typeIndex = 0; ap.selectXPath("//common:external-id/common:external-id-type"); while (ap.evalXPath() != -1) { @@ -177,12 +166,12 @@ public class XMLRecordParserNoDoi { if (typeIndex == valueIndex) { return extIds; } - return new ArrayList(); + return new ArrayList<>(); } - private static List getContributors(VTDGen vg, VTDNav vn, AutoPilot ap) + private static List getContributors(VTDNav vn, AutoPilot ap) throws XPathParseException, NavException, XPathEvalException { - List contributors = new ArrayList(); + List contributors = new ArrayList<>(); ap.selectXPath("//work:contributors/work:contributor"); while (ap.evalXPath() != -1) { Contributor contributor = new Contributor(); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index 2b241ed5f..82577e0d8 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -1,6 +1,7 @@ package eu.dnetlib.doiboost.orcid; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.*; @@ -57,7 +58,7 @@ public class OrcidClientTest { // 'https://api.orcid.org/v3.0/0000-0001-7291-3210/record' @Test - public void downloadTest() throws Exception { + void downloadTest() throws Exception { final String orcid = "0000-0001-7291-3210"; String record = testDownloadRecord(orcid, REQUEST_TYPE_RECORD); String filename = testPath + "/downloaded_record_".concat(orcid).concat(".xml"); @@ -159,18 +160,18 @@ public class OrcidClientTest { } @Test - private void testReadBase64CompressedRecord() throws Exception { + void testReadBase64CompressedRecord() throws Exception { final String base64CompressedRecord = IOUtils .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); final String recordFromSeqFile = ArgumentApplicationParser.decompressValue(base64CompressedRecord); logToFile(testPath, "\n\ndownloaded \n\n" + recordFromSeqFile); final String downloadedRecord = testDownloadRecord("0000-0003-3028-6161", REQUEST_TYPE_RECORD); - assertTrue(recordFromSeqFile.equals(downloadedRecord)); + assertEquals(recordFromSeqFile, downloadedRecord); } @Test @Disabled - public void lambdaFileReaderTest() throws Exception { + void lambdaFileReaderTest() throws Exception { String last_update = "2021-01-12 00:00:06.685137"; TarArchiveInputStream input = new TarArchiveInputStream( new GzipCompressorInputStream(new FileInputStream("/tmp/last_modified.csv.tar"))); @@ -187,7 +188,7 @@ public class OrcidClientTest { while ((line = br.readLine()) != null) { String[] values = line.split(","); List recordInfo = Arrays.asList(values); - assertTrue(recordInfo.size() == 4); + assertEquals(4, recordInfo.size()); String orcid = recordInfo.get(0); String modifiedDate = recordInfo.get(3); rowNum++; @@ -264,7 +265,7 @@ public class OrcidClientTest { } @Test - public void downloadWorkTest() throws Exception { + void downloadWorkTest() throws Exception { String orcid = "0000-0003-0015-1952"; String record = testDownloadRecord(orcid, REQUEST_TYPE_WORK); String filename = "/tmp/downloaded_work_".concat(orcid).concat(".xml"); @@ -274,7 +275,7 @@ public class OrcidClientTest { } @Test - public void downloadRecordTest() throws Exception { + void downloadRecordTest() throws Exception { String orcid = "0000-0001-5004-5918"; String record = testDownloadRecord(orcid, REQUEST_TYPE_RECORD); String filename = "/tmp/downloaded_record_".concat(orcid).concat(".xml"); @@ -284,7 +285,7 @@ public class OrcidClientTest { } @Test - public void downloadWorksTest() throws Exception { + void downloadWorksTest() throws Exception { String orcid = "0000-0001-5004-5918"; String record = testDownloadRecord(orcid, REQUEST_TYPE_WORKS); String filename = "/tmp/downloaded_works_".concat(orcid).concat(".xml"); @@ -294,7 +295,7 @@ public class OrcidClientTest { } @Test - public void downloadSingleWorkTest() throws Exception { + void downloadSingleWorkTest() throws Exception { String orcid = "0000-0001-5004-5918"; String record = testDownloadRecord(orcid, REQUEST_TYPE_WORK); String filename = "/tmp/downloaded_work_47652866_".concat(orcid).concat(".xml"); @@ -304,7 +305,7 @@ public class OrcidClientTest { } @Test - public void cleanAuthorListTest() throws Exception { + void cleanAuthorListTest() throws Exception { AuthorData a1 = new AuthorData(); a1.setOid("1"); a1.setName("n1"); @@ -333,7 +334,7 @@ public class OrcidClientTest { @Test @Ignore - public void testUpdatedRecord() throws Exception { + void testUpdatedRecord() throws Exception { final String base64CompressedRecord = IOUtils .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); final String record = ArgumentApplicationParser.decompressValue(base64CompressedRecord); @@ -342,7 +343,7 @@ public class OrcidClientTest { @Test @Ignore - private void testUpdatedWork() throws Exception { + void testUpdatedWork() throws Exception { final String base64CompressedWork = "H4sIAAAAAAAAAM1XS2/jNhC+51cQOuxJsiXZSR03Vmq0G6Bo013E6R56oyXaZiOJWpKy4y783zvUg5Ksh5uiCJogisX5Zjj85sHx3f1rFKI94YKyeGE4I9tAJPZZQOPtwvj9+cGaGUhIHAc4ZDFZGEcijHvv6u7A+MtcPVCSSgsUQObYzuzaccBEguVuYYxt+LHgbwKP6a11M3WnY6UzrpB7KuiahlQeF0aSrkPqGwhcisWcxpLwGIcLYydlMh+PD4fDiHGfBvDcjmMxLhGlBglSH8vsIH0qGlLqBFRIGvvDWjWQ1iMJJ2CKBANqGlNqMbkj3IpxRPq1KkypFZFoDRHa0aRfq8JoNjhnfIAJJS6xPouiIQJyeYmGQzE+cO5cXqITcItBlKyASExD0a93jiwtvJDjYXDDAqBPHoH2wMmVWGNf8xyyaEBiSTeUDHHWBpd2Nmmc10yfbgHQrHCyIRxKjQwRUoFKPRwEnIgBnQJQVdGeQgJaCRN0OMnPkaUFVbD9WkpaIndQJowf+8EFoIpTErJjBFQOBavElFpfUxwC9ZcqvQErdQXhe+oPFF8BaObupYzVsYEOARzSoZBWmKqaBMHcV0Wf8oG0beIqD+Gdkz0lhyE3NajUW6fhQFSV9Nw/MCBYyofYa0EN7wrBz13eP+Y+J6obWgE8Pdd2JpYD94P77Ezmjj13b0bu5PqPu3EXumEnxEJaEVxSUIHammsra+53z44zt2/m1/bItaeVtQ6dhs3c4XytvW75IYUchMKvEHVUyqmnWBFAS0VJrqSvQde6vp251ux2NtFuKcVOi+oK9YY0M0Cn6o4J6WkvtEK2XJ1vfPGAZxSoK8lb+SxJBbLQx1CohOLndjJUywQWUFmqEi3G6Zaqf/7buOyYJd5IYpfmf0XipfP18pDR9cQCeEuJQI/Lx36bFbVnpBeL2UwmqQw7ApAvf4GeGGQdEbENgolui/wdpjHaYCmPCIPPAmGBIsxfoLUhyRCB0SeCakEBJRKBtfJ+UBbI15TG4PaGBAhWthx8DmFYtHZQujv1CWbLLdzmmUKmHEOWCe1/zdu78bn/+YH+hCOqOzcXfFwuP6OVT/P710crwqGXFrpNaM2GT3MXarw01i15TIi3pmtJXgtbTVGf3h6HKfF+wBAnPyTfdCChudlm5gZaoG//F9pPZsGQcqqbyZN5hBau5OoIJ3PPwjTKDuG4s5MZp2rMzF5PZoK34IT6PIFOPrk+mTiVO5aJH2C+JJRjE/06eoRfpJxa4VgyYaLlaJUv/EhCfATMU/76gEOfmehL/qbJNNHjaFna+CQYB8wvo9PpPFJ5MOrJ1Ix7USBZqBl7KRNOx1d3jex7SG6zuijqCMWRusBsncjZSrM2u82UJmqzpGhvUJN2t6caIM9QQgO9c0t40UROnWsJd2Rbs+nsxpna9u30ttNkjechmzHjEST+X5CkkuNY0GzQkzyFseAf7lSZuLwdh1xSXKvvQJ4g4abTYgPV7uMt3rskohlJmMa82kQkshtyBEIYqQ+YB8X3oRHg7iFKi/bZP+Ao+T6BJhIT/vNPi8ffZs+flk+r2v0WNroZiyWn6xRmadHqTJXsjLJczElAZX6TnJdoWTM1SI2gfutv3rjeBt5t06rVvNuWup29246tlvluO+u2/G92bK9DXheL6uFd/Q3EaRDZqBIAAA=="; final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork); logToFile(testPath, "\n\nwork updated \n\n" + work); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java index 235db52d4..78760fa96 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/xml/XMLRecordParserTest.java @@ -24,10 +24,7 @@ import eu.dnetlib.doiboost.orcidnodoi.json.JsonWriter; import eu.dnetlib.doiboost.orcidnodoi.xml.XMLRecordParserNoDoi; public class XMLRecordParserTest { - private static final String NS_WORK = "work"; - private static final String NS_WORK_URL = "http://www.orcid.org/ns/work"; - private static final String NS_COMMON_URL = "http://www.orcid.org/ns/common"; - private static final String NS_COMMON = "common"; + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static Path testPath; @@ -38,12 +35,10 @@ public class XMLRecordParserTest { } @Test - public void testOrcidAuthorDataXMLParser() throws Exception { + void testOrcidAuthorDataXMLParser() throws Exception { String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_0000-0001-6828-479X.xml")); - XMLRecordParser p = new XMLRecordParser(); - AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes()); assertNotNull(authorData); assertNotNull(authorData.getName()); @@ -54,12 +49,10 @@ public class XMLRecordParserTest { } @Test - public void testOrcidXMLErrorRecordParser() throws Exception { + void testOrcidXMLErrorRecordParser() throws Exception { String xml = IOUtils.toString(this.getClass().getResourceAsStream("summary_error.xml")); - XMLRecordParser p = new XMLRecordParser(); - AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes()); assertNotNull(authorData); assertNotNull(authorData.getErrorCode()); @@ -67,14 +60,12 @@ public class XMLRecordParserTest { } @Test - public void testOrcidWorkDataXMLParser() throws Exception { + void testOrcidWorkDataXMLParser() throws Exception { String xml = IOUtils .toString( this.getClass().getResourceAsStream("activity_work_0000-0003-2760-1191.xml")); - XMLRecordParser p = new XMLRecordParser(); - WorkData workData = XMLRecordParser.VTDParseWorkData(xml.getBytes()); assertNotNull(workData); assertNotNull(workData.getOid()); @@ -83,7 +74,7 @@ public class XMLRecordParserTest { } @Test - public void testOrcidOtherNamesXMLParser() throws Exception { + void testOrcidOtherNamesXMLParser() throws Exception { String xml = IOUtils .toString( @@ -91,30 +82,13 @@ public class XMLRecordParserTest { AuthorData authorData = XMLRecordParser.VTDParseAuthorData(xml.getBytes()); assertNotNull(authorData); assertNotNull(authorData.getOtherNames()); - assertTrue(authorData.getOtherNames().get(0).equals("Andrew C. Porteus")); + assertEquals("Andrew C. Porteus", authorData.getOtherNames().get(0)); String jsonData = JsonWriter.create(authorData); assertNotNull(jsonData); } -// @Test -// private void testWorkIdLastModifiedDateXMLParser() throws Exception { -// String xml = IOUtils -// .toString( -// this.getClass().getResourceAsStream("record_0000-0001-5004-5918.xml")); -// Map workIdLastModifiedDate = XMLRecordParser.retrieveWorkIdLastModifiedDate(xml.getBytes()); -// workIdLastModifiedDate.forEach((k, v) -> { -// try { -// OrcidClientTest -// .logToFile( -// k + " " + v + " isModified after " + SparkDownloadOrcidWorks.lastUpdateValue + ": " -// + SparkDownloadOrcidWorks.isModified("0000-0001-5004-5918", v)); -// } catch (IOException e) { -// } -// }); -// } - @Test - public void testAuthorSummaryXMLParser() throws Exception { + void testAuthorSummaryXMLParser() throws Exception { String xml = IOUtils .toString( this.getClass().getResourceAsStream("record_0000-0001-5004-5918.xml")); @@ -124,7 +98,7 @@ public class XMLRecordParserTest { } @Test - public void testWorkDataXMLParser() throws Exception { + void testWorkDataXMLParser() throws Exception { String xml = IOUtils .toString( this.getClass().getResourceAsStream("activity_work_0000-0003-2760-1191.xml")); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java index 01e26dcb4..1dbb37fca 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java @@ -11,36 +11,37 @@ import org.slf4j.LoggerFactory; import com.google.gson.JsonElement; import com.google.gson.JsonParser; +import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; import jdk.nashorn.internal.ir.annotations.Ignore; -public class PublicationToOafTest { +class PublicationToOafTest { private static final Logger logger = LoggerFactory.getLogger(PublicationToOafTest.class); @Test @Ignore - private void convertOafPublicationTest() throws Exception { + void convertOafPublicationTest() throws Exception { String jsonPublication = IOUtils .toString( PublicationToOafTest.class.getResourceAsStream("publication.json")); JsonElement j = new JsonParser().parse(jsonPublication); - logger.info("json publication loaded: " + j.toString()); + logger.info("json publication loaded: {}", j.toString()); PublicationToOaf publicationToOaf = new PublicationToOaf(); Publication oafPublication = (Publication) publicationToOaf .generatePublicationActionsFromDump(j.getAsJsonObject()); assertNotNull(oafPublication.getId()); assertNotNull(oafPublication.getOriginalId()); - assertEquals(oafPublication.getOriginalId().get(0), "60153327"); - logger.info("oafPublication.getId(): " + oafPublication.getId()); + assertEquals("60153327", oafPublication.getOriginalId().get(0)); + logger.info("oafPublication.getId(): {}", oafPublication.getId()); assertEquals( - oafPublication.getTitle().get(0).getValue(), - "Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp"); + "Evaluation of a percutaneous optical fibre glucose sensor (FiberSense) across the glycemic range with rapid glucoseexcursions using the glucose clamp", + oafPublication.getTitle().get(0).getValue()); assertNotNull(oafPublication.getLastupdatetimestamp()); assertNotNull(oafPublication.getDateofcollection()); assertNotNull(oafPublication.getDateoftransformation()); - assertTrue(oafPublication.getAuthor().size() == 7); + assertEquals(7, oafPublication.getAuthor().size()); oafPublication.getAuthor().forEach(a -> { assertNotNull(a.getFullname()); assertNotNull(a.getRank()); @@ -64,15 +65,15 @@ public class PublicationToOafTest { if (oafPublication.getExternalReference() != null) { oafPublication.getExternalReference().forEach(e -> { assertNotNull(e.getRefidentifier()); - assertEquals(e.getQualifier().getSchemeid(), "dnet:pid_types"); + assertEquals(ModelConstants.DNET_PID_TYPES, e.getQualifier().getSchemeid()); }); } assertNotNull(oafPublication.getInstance()); oafPublication.getInstance().forEach(i -> { assertNotNull(i.getInstancetype().getClassid()); - logger.info("i.getInstancetype().getClassid(): " + i.getInstancetype().getClassid()); + logger.info("i.getInstancetype().getClassid(): {}", i.getInstancetype().getClassid()); assertNotNull(i.getInstancetype().getClassname()); - logger.info("i.getInstancetype().getClassname(): " + i.getInstancetype().getClassname()); + logger.info("i.getInstancetype().getClassname(): {}", i.getInstancetype().getClassname()); }); } } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java index 54c2d6217..99ec656d5 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java @@ -1,8 +1,7 @@ package eu.dnetlib.doiboost.orcidnodoi.xml; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import java.io.IOException; import java.util.*; @@ -25,7 +24,7 @@ import eu.dnetlib.dhp.schema.orcid.Contributor; import eu.dnetlib.dhp.schema.orcid.WorkDetail; import eu.dnetlib.doiboost.orcidnodoi.similarity.AuthorMatcher; -public class OrcidNoDoiTest { +class OrcidNoDoiTest { private static final Logger logger = LoggerFactory.getLogger(OrcidNoDoiTest.class); @@ -34,7 +33,7 @@ public class OrcidNoDoiTest { static String orcidIdA = "0000-0003-2760-1191"; @Test - public void readPublicationFieldsTest() + void readPublicationFieldsTest() throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { logger.info("running loadPublicationFieldsTest ...."); String xml = IOUtils @@ -44,10 +43,6 @@ public class OrcidNoDoiTest { if (xml == null) { logger.info("Resource not found"); } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } WorkDetail workData = null; try { workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); @@ -87,7 +82,7 @@ public class OrcidNoDoiTest { } @Test - public void authorDoubleMatchTest() throws Exception { + void authorDoubleMatchTest() throws Exception { logger.info("running authorSimpleMatchTest ...."); String orcidWork = "activity_work_0000-0003-2760-1191-similarity.xml"; AuthorData author = new AuthorData(); @@ -98,71 +93,49 @@ public class OrcidNoDoiTest { .toString( OrcidNoDoiTest.class.getResourceAsStream(orcidWork)); - if (xml == null) { - logger.info("Resource not found"); - } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } - WorkDetail workData = null; - try { - workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); - } catch (Exception e) { - logger.error("parsing xml", e); - } + WorkDetail workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); + assertNotNull(workData); Contributor a = workData.getContributors().get(0); - assertTrue(a.getCreditName().equals("Abdel-Dayem K")); + assertEquals("Abdel-Dayem K", a.getCreditName()); AuthorMatcher.match(author, workData.getContributors()); - assertTrue(workData.getContributors().size() == 6); + assertEquals(6, workData.getContributors().size()); } @Test - public void readContributorsTest() + void readContributorsTest() throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException { logger.info("running loadPublicationFieldsTest ...."); String xml = IOUtils .toString( OrcidNoDoiTest.class.getResourceAsStream("activity_work_0000-0003-2760-1191_contributors.xml")); - if (xml == null) { - logger.info("Resource not found"); - } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } - WorkDetail workData = null; - try { - workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); - } catch (Exception e) { - logger.error("parsing xml", e); - } + WorkDetail workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); + assertNotNull(workData.getContributors()); - assertTrue(workData.getContributors().size() == 5); + assertEquals(5, workData.getContributors().size()); assertTrue(StringUtils.isBlank(workData.getContributors().get(0).getCreditName())); - assertTrue(workData.getContributors().get(0).getSequence().equals("seq0")); - assertTrue(workData.getContributors().get(0).getRole().equals("role0")); - assertTrue(workData.getContributors().get(1).getCreditName().equals("creditname1")); + assertEquals("seq0", workData.getContributors().get(0).getSequence()); + assertEquals("role0", workData.getContributors().get(0).getRole()); + assertEquals("creditname1", workData.getContributors().get(1).getCreditName()); assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getSequence())); assertTrue(StringUtils.isBlank(workData.getContributors().get(1).getRole())); - assertTrue(workData.getContributors().get(2).getCreditName().equals("creditname2")); - assertTrue(workData.getContributors().get(2).getSequence().equals("seq2")); + assertEquals("creditname2", workData.getContributors().get(2).getCreditName()); + assertEquals("seq2", workData.getContributors().get(2).getSequence()); assertTrue(StringUtils.isBlank(workData.getContributors().get(2).getRole())); - assertTrue(workData.getContributors().get(3).getCreditName().equals("creditname3")); + assertEquals("creditname3", workData.getContributors().get(3).getCreditName()); assertTrue(StringUtils.isBlank(workData.getContributors().get(3).getSequence())); - assertTrue(workData.getContributors().get(3).getRole().equals("role3")); + assertEquals("role3", workData.getContributors().get(3).getRole()); assertTrue(StringUtils.isBlank(workData.getContributors().get(4).getCreditName())); - assertTrue(workData.getContributors().get(4).getSequence().equals("seq4")); - assertTrue(workData.getContributors().get(4).getRole().equals("role4")); + assertEquals("seq4", workData.getContributors().get(4).getSequence()); + assertEquals("role4", workData.getContributors().get(4).getRole()); } @Test - public void authorSimpleMatchTest() throws Exception { + void authorSimpleMatchTest() throws Exception { String orcidWork = "activity_work_0000-0002-5982-8983.xml"; AuthorData author = new AuthorData(); author.setName("Parkhouse"); @@ -175,10 +148,6 @@ public class OrcidNoDoiTest { if (xml == null) { logger.info("Resource not found"); } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } WorkDetail workData = null; try { workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); @@ -188,20 +157,21 @@ public class OrcidNoDoiTest { assertNotNull(workData); Contributor a = workData.getContributors().get(0); - assertTrue(a.getCreditName().equals("Parkhouse, H.")); + assertEquals("Parkhouse, H.", a.getCreditName()); AuthorMatcher.match(author, workData.getContributors()); - assertTrue(workData.getContributors().size() == 2); + assertEquals(2, workData.getContributors().size()); Contributor c = workData.getContributors().get(0); - assertTrue(c.getOid().equals("0000-0002-5982-8983")); - assertTrue(c.getName().equals("Parkhouse")); - assertTrue(c.getSurname().equals("H.")); - assertTrue(c.getCreditName().equals("Parkhouse, H.")); + + assertEquals("0000-0002-5982-8983", c.getOid()); + assertEquals("Parkhouse", c.getName()); + assertEquals("H.", c.getSurname()); + assertEquals("Parkhouse, H.", c.getCreditName()); } @Test - public void match() { + void match() { AuthorData author = new AuthorData(); author.setName("Joe"); @@ -210,7 +180,6 @@ public class OrcidNoDoiTest { Contributor contributor = new Contributor(); contributor.setCreditName("Joe Dodge"); List contributors = Arrays.asList(contributor); - AuthorMatcher am = new AuthorMatcher(); int matchCounter = 0; List matchCounters = Arrays.asList(matchCounter); contributors @@ -225,12 +194,13 @@ public class OrcidNoDoiTest { } }); - assertTrue(matchCounters.get(0) == 1); + assertEquals(1, matchCounters.get(0)); AuthorMatcher.updateAuthorsSimpleMatch(contributors, author); - assertTrue(contributors.get(0).getName().equals("Joe")); - assertTrue(contributors.get(0).getSurname().equals("Dodge")); - assertTrue(contributors.get(0).getCreditName().equals("Joe Dodge")); - assertTrue(contributors.get(0).getOid().equals("0000-1111-2222-3333")); + + assertEquals("Joe", contributors.get(0).getName()); + assertEquals("Dodge", contributors.get(0).getSurname()); + assertEquals("Joe Dodge", contributors.get(0).getCreditName()); + assertEquals("0000-1111-2222-3333", contributors.get(0).getOid()); AuthorData authorX = new AuthorData(); authorX.setName(nameA); @@ -259,7 +229,7 @@ public class OrcidNoDoiTest { } }); - assertTrue(matchCounters2.get(0) == 2); + assertEquals(2, matchCounters2.get(0)); assertTrue(contributorList.get(0).isSimpleMatch()); assertTrue(contributorList.get(1).isSimpleMatch()); @@ -271,7 +241,7 @@ public class OrcidNoDoiTest { c.setScore(AuthorMatcher.bestMatch(authorX.getName(), authorX.getSurname(), c.getCreditName())); return c; }) - .filter(c -> c.getScore() >= AuthorMatcher.threshold) + .filter(c -> c.getScore() >= AuthorMatcher.THRESHOLD) .max(Comparator.comparing(c -> c.getScore())); assertTrue(optCon.isPresent()); @@ -281,15 +251,16 @@ public class OrcidNoDoiTest { assertTrue(contributorList.get(0).isBestMatch()); assertTrue(!contributorList.get(1).isBestMatch()); AuthorMatcher.updateAuthorsSimilarityMatch(contributorList, authorX); - assertTrue(contributorList.get(0).getName().equals(nameA)); - assertTrue(contributorList.get(0).getSurname().equals(surnameA)); - assertTrue(contributorList.get(0).getCreditName().equals("Abdel-Dayem Khai")); - assertTrue(contributorList.get(0).getOid().equals(orcidIdA)); + + assertEquals(nameA, contributorList.get(0).getName()); + assertEquals(surnameA, contributorList.get(0).getSurname()); + assertEquals("Abdel-Dayem Khai", contributorList.get(0).getCreditName()); + assertEquals(orcidIdA, contributorList.get(0).getOid()); assertTrue(StringUtils.isBlank(contributorList.get(1).getOid())); } @Test - public void authorBestMatchTest() throws Exception { + void authorBestMatchTest() throws Exception { String name = "Khairy"; String surname = "Abdel Dayem"; String orcidWork = "activity_work_0000-0003-2760-1191.xml"; @@ -304,10 +275,6 @@ public class OrcidNoDoiTest { if (xml == null) { logger.info("Resource not found"); } - XMLRecordParserNoDoi p = new XMLRecordParserNoDoi(); - if (p == null) { - logger.info("XMLRecordParserNoDoi null"); - } WorkDetail workData = null; try { workData = XMLRecordParserNoDoi.VTDParseWorkData(xml.getBytes()); @@ -315,16 +282,17 @@ public class OrcidNoDoiTest { logger.error("parsing xml", e); } AuthorMatcher.match(author, workData.getContributors()); - assertTrue(workData.getContributors().size() == 5); + assertEquals(5, workData.getContributors().size()); List c = workData.getContributors(); - assertTrue(c.get(0).getName().equals(name)); - assertTrue(c.get(0).getSurname().equals(surname)); - assertTrue(c.get(0).getCreditName().equals("Khair Abde Daye")); - assertTrue(c.get(0).getOid().equals(orcidIdA)); + + assertEquals(name, c.get(0).getName()); + assertEquals(surname, c.get(0).getSurname()); + assertEquals("Khair Abde Daye", c.get(0).getCreditName()); + assertEquals(orcidIdA, c.get(0).getOid()); } @Test - public void otherNamesMatchTest() + void otherNamesMatchTest() throws VtdException, ParseException, IOException, XPathEvalException, NavException, XPathParseException { AuthorData author = new AuthorData(); @@ -341,8 +309,9 @@ public class OrcidNoDoiTest { contributor.setCreditName("XY"); List contributors = Arrays.asList(contributor); AuthorMatcher.match(author, contributors); - assertTrue(contributors.get(0).getName().equals("Joe")); - assertTrue(contributors.get(0).getSurname().equals("Dodge")); - assertTrue(contributors.get(0).getOid().equals("0000-1111-2222-3333")); + + assertEquals("Joe", contributors.get(0).getName()); + assertEquals("Dodge", contributors.get(0).getSurname()); + assertEquals("0000-1111-2222-3333", contributors.get(0).getOid()); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 692605b03..0b4a80b2d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -14,12 +14,17 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.Relation; public class PropagationConstant { + + private PropagationConstant() { + } + public static final String INSTITUTIONAL_REPO_TYPE = "pubsrepository::institutional"; public static final String PROPAGATION_DATA_INFO_TYPE = "propagation"; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java index 0f45d3beb..8e76b5778 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java @@ -5,16 +5,11 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import com.google.gson.Gson; /** Created by miriam on 01/08/2018. */ public class Community implements Serializable { - private static final Log log = LogFactory.getLog(Community.class); - private String id; private List subjects = new ArrayList<>(); private List providers = new ArrayList<>(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java index 844fe2962..5d92f5ab6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java @@ -2,15 +2,9 @@ package eu.dnetlib.dhp.bulktag.community; import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.stream.Collectors; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.gson.Gson; @@ -22,8 +16,6 @@ import eu.dnetlib.dhp.bulktag.criteria.Selection; /** Created by miriam on 02/08/2018. */ public class CommunityConfiguration implements Serializable { - private static final Log log = LogFactory.getLog(CommunityConfiguration.class); - private Map communities; // map subject -> communityid @@ -136,7 +128,7 @@ public class CommunityConfiguration implements Serializable { else return null; }) - .filter(st -> (st != null)) + .filter(Objects::nonNull) .collect(Collectors.toList()); } @@ -161,7 +153,7 @@ public class CommunityConfiguration implements Serializable { private List getContextIds(List> list) { if (list != null) { - return list.stream().map(p -> p.getFst()).collect(Collectors.toList()); + return list.stream().map(Pair::getFst).collect(Collectors.toList()); } return Lists.newArrayList(); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java index 749ed292f..822d35078 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java @@ -13,6 +13,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -31,11 +32,16 @@ public class CommunityConfigurationFactory { private static final VerbResolver resolver = VerbResolverFactory.newInstance(); - public static CommunityConfiguration newInstance(final String xml) throws DocumentException { + private CommunityConfigurationFactory() { + } + + public static CommunityConfiguration newInstance(final String xml) throws DocumentException, SAXException { log.debug(String.format("parsing community configuration from:\n%s", xml)); - final Document doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(xml)); final Map communities = Maps.newHashMap(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java index e0856ae8f..9002e718f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java @@ -13,9 +13,6 @@ public class Constraint implements Serializable { private String value; private Selection selection; - public Constraint() { - } - public String getVerb() { return verb; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java index b56dfaaa3..0f6fab238 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java @@ -19,11 +19,8 @@ import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; /** Created by miriam on 02/08/2018. */ public class Constraints implements Serializable { private static final Log log = LogFactory.getLog(Constraints.class); - // private ConstraintEncapsulator ce; - private List constraint; - public Constraints() { - } + private List constraint; public List getConstraint() { return constraint; @@ -44,13 +41,8 @@ public class Constraints implements Serializable { try { st.setSelection(resolver); - } catch (NoSuchMethodException e) { - log.error(e.getMessage()); - } catch (IllegalAccessException e) { - log.error(e.getMessage()); - } catch (InvocationTargetException e) { - log.error(e.getMessage()); - } catch (InstantiationException e) { + } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException + | InstantiationException e) { log.error(e.getMessage()); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java index a9427b594..cb198dc43 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java @@ -19,10 +19,6 @@ public class Provider implements Serializable { private SelectionConstraints selectionConstraints; - public SelectionConstraints getSelCriteria() { - return selectionConstraints; - } - public SelectionConstraints getSelectionConstraints() { return selectionConstraints; } @@ -31,10 +27,6 @@ public class Provider implements Serializable { this.selectionConstraints = selectionConstraints; } - public void setSelCriteria(SelectionConstraints selCriteria) { - this.selectionConstraints = selCriteria; - } - public String getOpenaireId() { return openaireId; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java index 993a7ef77..89cf5beff 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.bulktag.community; import java.util.List; import org.dom4j.DocumentException; +import org.xml.sax.SAXException; import com.google.common.base.Joiner; @@ -63,7 +64,7 @@ public class QueryInformationSystem { + " "; public static CommunityConfiguration getCommunityConfiguration(final String isLookupUrl) - throws ISLookUpException, DocumentException { + throws ISLookUpException, DocumentException, SAXException { ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); final List res = isLookUp.quickSearchProfile(XQUERY); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index c9ff26963..c8b1bc8fe 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -43,7 +43,6 @@ public class ResultTagger implements Serializable { param.put(key, jsonContext.read(params.get(key))); } catch (com.jayway.jsonpath.PathNotFoundException e) { param.put(key, new ArrayList<>()); - // throw e; } } return param; @@ -52,9 +51,6 @@ public class ResultTagger implements Serializable { public R enrichContextCriteria( final R result, final CommunityConfiguration conf, final Map criteria) { - // } - // public Result enrichContextCriteria(final Result result, final CommunityConfiguration - // conf, final Map criteria) { final Map> param = getParamMap(result, criteria); // Verify if the entity is deletedbyinference. In case verify if to clean the context list @@ -74,7 +70,7 @@ public class ResultTagger implements Serializable { result .getSubject() .stream() - .map(subject -> subject.getValue()) + .map(StructuredProperty::getValue) .filter(StringUtils::isNotBlank) .map(String::toLowerCase) .map(String::trim) @@ -90,15 +86,11 @@ public class ResultTagger implements Serializable { if (Objects.nonNull(result.getInstance())) { for (Instance i : result.getInstance()) { - if (Objects.nonNull(i.getCollectedfrom())) { - if (Objects.nonNull(i.getCollectedfrom().getKey())) { - tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|")); - } + if (Objects.nonNull(i.getCollectedfrom()) && Objects.nonNull(i.getCollectedfrom().getKey())) { + tmp.add(StringUtils.substringAfter(i.getCollectedfrom().getKey(), "|")); } - if (Objects.nonNull(i.getHostedby())) { - if (Objects.nonNull(i.getHostedby().getKey())) { - tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|")); - } + if (Objects.nonNull(i.getHostedby()) && Objects.nonNull(i.getHostedby().getKey())) { + tmp.add(StringUtils.substringAfter(i.getHostedby().getKey(), "|")); } } @@ -149,52 +141,46 @@ public class ResultTagger implements Serializable { return result; } - result - .getContext() - .stream() - .map( - c -> { - if (communities.contains(c.getId())) { - Optional> opt_dataInfoList = Optional.ofNullable(c.getDataInfo()); - List dataInfoList; - if (opt_dataInfoList.isPresent()) - dataInfoList = opt_dataInfoList.get(); - else { - dataInfoList = new ArrayList<>(); - c.setDataInfo(dataInfoList); - } - if (subjects.contains(c.getId())) - dataInfoList - .add( - getDataInfo( - BULKTAG_DATA_INFO_TYPE, - CLASS_ID_SUBJECT, - CLASS_NAME_BULKTAG_SUBJECT, - TAGGING_TRUST)); - if (datasources.contains(c.getId())) - dataInfoList - .add( - getDataInfo( - BULKTAG_DATA_INFO_TYPE, - CLASS_ID_DATASOURCE, - CLASS_NAME_BULKTAG_DATASOURCE, - TAGGING_TRUST)); - if (czenodo.contains(c.getId())) - dataInfoList - .add( - getDataInfo( - BULKTAG_DATA_INFO_TYPE, - CLASS_ID_CZENODO, - CLASS_NAME_BULKTAG_ZENODO, - TAGGING_TRUST)); - } - return c; - }) - .collect(Collectors.toList()); + result.getContext().forEach(c -> { + if (communities.contains(c.getId())) { + Optional> opt_dataInfoList = Optional.ofNullable(c.getDataInfo()); + List dataInfoList; + if (opt_dataInfoList.isPresent()) + dataInfoList = opt_dataInfoList.get(); + else { + dataInfoList = new ArrayList<>(); + c.setDataInfo(dataInfoList); + } + if (subjects.contains(c.getId())) + dataInfoList + .add( + getDataInfo( + BULKTAG_DATA_INFO_TYPE, + CLASS_ID_SUBJECT, + CLASS_NAME_BULKTAG_SUBJECT, + TAGGING_TRUST)); + if (datasources.contains(c.getId())) + dataInfoList + .add( + getDataInfo( + BULKTAG_DATA_INFO_TYPE, + CLASS_ID_DATASOURCE, + CLASS_NAME_BULKTAG_DATASOURCE, + TAGGING_TRUST)); + if (czenodo.contains(c.getId())) + dataInfoList + .add( + getDataInfo( + BULKTAG_DATA_INFO_TYPE, + CLASS_ID_CZENODO, + CLASS_NAME_BULKTAG_ZENODO, + TAGGING_TRUST)); + } + }); communities .removeAll( - result.getContext().stream().map(c -> c.getId()).collect(Collectors.toSet())); + result.getContext().stream().map(Context::getId).collect(Collectors.toSet())); if (communities.isEmpty()) return result; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java index 71ff61d1b..c7dcce812 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java @@ -15,9 +15,6 @@ import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; public class SelectionConstraints implements Serializable { private List criteria; - public SelectionConstraints() { - } - public List getCriteria() { return criteria; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java index 80d98bb1a..8274e26c9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java @@ -3,6 +3,9 @@ package eu.dnetlib.dhp.bulktag.community; public class TaggingConstants { + private TaggingConstants() { + } + public static final String BULKTAG_DATA_INFO_TYPE = "bulktagging"; public static final String CLASS_ID_SUBJECT = "community:subject"; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java index bc6b75fba..54c2dc9aa 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java @@ -31,7 +31,6 @@ public class ZenodoCommunity implements Serializable { } private void setSelCriteria(String json) { - // Type collectionType = new TypeToken>(){}.getType(); selCriteria = new Gson().fromJson(json, SelectionConstraints.class); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java index ec9fb716d..9129e6e54 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/Selection.java @@ -1,7 +1,9 @@ package eu.dnetlib.dhp.bulktag.criteria; -public interface Selection { +import java.io.Serializable; + +public interface Selection extends Serializable { boolean apply(String value); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java index 54176efb6..459ac1ba9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java @@ -7,16 +7,16 @@ import java.util.Map; import java.util.stream.Collectors; import io.github.classgraph.ClassGraph; -import io.github.classgraph.ClassInfo; import io.github.classgraph.ClassInfoList; import io.github.classgraph.ScanResult; public class VerbResolver implements Serializable { - private Map> map = null; // = new HashMap<>(); - private final ClassGraph classgraph = new ClassGraph(); + + private Map> map = null; public VerbResolver() { + final ClassGraph classgraph = new ClassGraph(); try (ScanResult scanResult = // Assign scanResult in try-with-resources classgraph // Create a new ClassGraph instance .verbose() // If you want to enable logging to stderr @@ -41,8 +41,6 @@ public class VerbResolver implements Serializable { .get(0) .getValue(), value -> (Class) value.loadClass())); - } catch (Exception e) { - e.printStackTrace(); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java index 0bb801999..446ad5fbc 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolverFactory.java @@ -3,6 +3,9 @@ package eu.dnetlib.dhp.bulktag.criteria; public class VerbResolverFactory { + private VerbResolverFactory() { + } + public static VerbResolver newInstance() { return new VerbResolver(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index 04a659a1c..ddc7f93f7 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -6,11 +6,10 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Arrays; import java.util.List; +import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -18,11 +17,11 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Datasource; +import eu.dnetlib.dhp.schema.oaf.Organization; +import eu.dnetlib.dhp.schema.oaf.Relation; /** * For the association of the country to the datasource The association is computed only for datasource of specific type @@ -77,16 +76,16 @@ public class PrepareDatasourceCountryAssociation { List allowedtypes, String inputPath, String outputPath) { - String whitelisted = " d.id = '" + whitelist.get(0) + "'"; - for (int i = 1; i < whitelist.size(); i++) { - whitelisted += " OR d.id = '" + whitelist.get(i) + "'"; - } - String allowed = "d.datasourcetype.classid = '" + allowedtypes.get(0) + "'"; + final String whitelisted = whitelist + .stream() + .map(id -> " d.id = '" + id + "'") + .collect(Collectors.joining(" OR ")); - for (int i = 1; i < allowedtypes.size(); i++) { - allowed += " OR d.datasourcetype.classid = '" + allowedtypes.get(i) + "'"; - } + final String allowed = allowedtypes + .stream() + .map(type -> " d.datasourcetype.classid = '" + type + "'") + .collect(Collectors.joining(" OR ")); Dataset datasource = readPath(spark, inputPath + "/datasource", Datasource.class); Dataset relation = readPath(spark, inputPath + "/relation", Relation.class); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java index 8d0d6c48b..77f7288f6 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java @@ -85,13 +85,12 @@ public class PrepareResultCountrySet { Dataset result = readPath(spark, inputPath, resultClazz); result.createOrReplaceTempView("result"); - // log.info("number of results: {}", result.count()); + createCfHbforResult(spark); Dataset datasource_country = readPath(spark, datasourcecountrypath, DatasourceCountry.class); datasource_country.createOrReplaceTempView("datasource_country"); - // log.info("datasource_country number : {}", datasource_country.count()); spark .sql(RESULT_COUNTRYSET_QUERY) @@ -102,7 +101,7 @@ public class PrepareResultCountrySet { ArrayList countryList = a.getCountrySet(); Set countryCodes = countryList .stream() - .map(country -> country.getClassid()) + .map(CountrySbs::getClassid) .collect(Collectors.toSet()); b .getCountrySet() @@ -119,10 +118,6 @@ public class PrepareResultCountrySet { }) .map(couple -> OBJECT_MAPPER.writeValueAsString(couple._2())) .saveAsTextFile(outputPath, GzipCodec.class); -// .write() -// .option("compression", "gzip") -// .mode(SaveMode.Append) -// .json(outputPath); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index 97e0a33e1..4aa48583f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -4,7 +4,9 @@ package eu.dnetlib.dhp.countrypropagation; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.util.*; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -17,10 +19,9 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; @@ -28,8 +29,6 @@ public class SparkCountryPropagationJob { private static final Logger log = LoggerFactory.getLogger(SparkCountryPropagationJob.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -90,7 +89,6 @@ public class SparkCountryPropagationJob { boolean saveGraph) { if (saveGraph) { - // updateResultTable(spark, potentialUpdates, inputPath, resultClazz, outputPath); log.info("Reading Graph table from: {}", sourcePath); Dataset res = readPath(spark, sourcePath, resultClazz); @@ -122,7 +120,7 @@ public class SparkCountryPropagationJob { private static List merge(List c1, List c2) { HashSet countries = c1 .stream() - .map(c -> c.getClassid()) + .map(Qualifier::getClassid) .collect(Collectors.toCollection(HashSet::new)); return c2 diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index b3ef3a112..95b870292 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -8,9 +8,7 @@ import java.util.Arrays; import java.util.List; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; @@ -18,7 +16,6 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java index 2cea32e58..c60012a74 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java @@ -87,7 +87,7 @@ public class PrepareResultOrcidAssociationStep2 { }); return a; }) - .map(c -> c._2()) + .map(Tuple2::_2) .map(r -> OBJECT_MAPPER.writeValueAsString(r)) .saveAsTextFile(outputPath, GzipCodec.class); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 436a53cbe..40faef7f3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -18,7 +18,6 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -141,34 +140,31 @@ public class SparkOrcidToResultFromSemRelJob { author_surname = author.getSurname(); } if (StringUtils.isNotEmpty(author_surname)) { + // have the same surname. Check the name if (autoritative_author .getSurname() .trim() - .equalsIgnoreCase(author_surname.trim())) { - - // have the same surname. Check the name - if (StringUtils.isNotEmpty(autoritative_author.getName())) { - if (StringUtils.isNotEmpty(author.getName())) { - author_name = author.getName(); + .equalsIgnoreCase(author_surname.trim()) && StringUtils.isNotEmpty(autoritative_author.getName())) { + if (StringUtils.isNotEmpty(author.getName())) { + author_name = author.getName(); + } + if (StringUtils.isNotEmpty(author_name)) { + if (autoritative_author + .getName() + .trim() + .equalsIgnoreCase(author_name.trim())) { + toaddpid = true; } - if (StringUtils.isNotEmpty(author_name)) { + // they could be differently written (i.e. only the initials of the name + // in one of the two + else { if (autoritative_author .getName() .trim() - .equalsIgnoreCase(author_name.trim())) { + .substring(0, 0) + .equalsIgnoreCase(author_name.trim().substring(0, 0))) { toaddpid = true; } - // they could be differently written (i.e. only the initials of the name - // in one of the two - else { - if (autoritative_author - .getName() - .trim() - .substring(0, 0) - .equalsIgnoreCase(author_name.trim().substring(0, 0))) { - toaddpid = true; - } - } } } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java index 27ff727fd..ac61e26f9 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java @@ -2,30 +2,25 @@ package eu.dnetlib.dhp.projecttoresult; import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.PropagationConstant.getConstraintList; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.Arrays; import java.util.List; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; public class PrepareProjectResultsAssociation { - private static final Logger log = LoggerFactory.getLogger(PrepareDatasourceCountryAssociation.class); + private static final Logger log = LoggerFactory.getLogger(PrepareProjectResultsAssociation.class); public static void main(String[] args) throws Exception { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index c57abb451..1ec521af1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -5,28 +5,27 @@ import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.sql.*; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.countrypropagation.PrepareDatasourceCountryAssociation; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; public class SparkResultToProjectThroughSemRelJob { - private static final Logger log = LoggerFactory.getLogger(PrepareDatasourceCountryAssociation.class); + private static final Logger log = LoggerFactory.getLogger(SparkResultToProjectThroughSemRelJob.class); public static void main(String[] args) throws Exception { @@ -95,26 +94,21 @@ public class SparkResultToProjectThroughSemRelJob { private static FlatMapFunction, Relation> mapRelationRn() { return value -> { - List new_relations = new ArrayList<>(); - ResultProjectSet potential_update = value._1(); - Optional already_linked = Optional.ofNullable(value._2()); - if (already_linked.isPresent()) { - already_linked - .get() - .getProjectSet() - .stream() - .forEach( - (p -> { - potential_update.getProjectSet().remove(p); - })); - } - String resId = potential_update.getResultId(); - potential_update + List newRelations = new ArrayList<>(); + ResultProjectSet potentialUpdate = value._1(); + Optional alreadyLinked = Optional.ofNullable(value._2()); + alreadyLinked + .ifPresent( + resultProjectSet -> resultProjectSet + .getProjectSet() + .forEach( + (p -> potentialUpdate.getProjectSet().remove(p)))); + String resId = potentialUpdate.getResultId(); + potentialUpdate .getProjectSet() - .stream() .forEach( projectId -> { - new_relations + newRelations .add( getRelation( resId, @@ -125,7 +119,7 @@ public class SparkResultToProjectThroughSemRelJob { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME)); - new_relations + newRelations .add( getRelation( projectId, @@ -137,7 +131,7 @@ public class SparkResultToProjectThroughSemRelJob { PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_ID, PROPAGATION_RELATION_RESULT_PROJECT_SEM_REL_CLASS_NAME)); }); - return new_relations.iterator(); + return newRelations.iterator(); }; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index a5f84cd2f..1a008797d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -10,11 +10,12 @@ import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.sql.*; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -109,10 +110,6 @@ public class PrepareResultCommunitySet { }) .map(value -> OBJECT_MAPPER.writeValueAsString(value._2())) .saveAsTextFile(outputPath, GzipCodec.class); -// .write() -// .mode(SaveMode.Overwrite) -// .option("compression", "gzip") -// .json(outputPath); } private static MapFunction mapResultCommunityFn( @@ -131,7 +128,7 @@ public class PrepareResultCommunitySet { communitySet.addAll(organizationMap.get(oId)); } } - if (communitySet.size() > 0) { + if (!communitySet.isEmpty()) { ResultCommunityList rcl = new ResultCommunityList(); rcl.setResultId(rId); ArrayList communityList = new ArrayList<>(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index 7201a30f6..cb80a90ca 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -4,7 +4,10 @@ package eu.dnetlib.dhp.resulttocommunityfromorganization; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -17,10 +20,9 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Context; +import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; public class SparkResultToCommunityFromOrganizationJob { @@ -59,6 +61,7 @@ public class SparkResultToCommunityFromOrganizationJob { .orElse(Boolean.TRUE); log.info("saveGraph: {}", saveGraph); + @SuppressWarnings("unchecked") Class resultClazz = (Class) Class.forName(resultClassName); SparkConf conf = new SparkConf(); @@ -106,9 +109,12 @@ public class SparkResultToCommunityFromOrganizationJob { List contextList = ret .getContext() .stream() - .map(con -> con.getId()) + .map(Context::getId) .collect(Collectors.toList()); + + @SuppressWarnings("unchecked") R res = (R) ret.getClass().newInstance(); + res.setId(ret.getId()); List propagatedContexts = new ArrayList<>(); for (String cId : communitySet) { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java index 09340369d..0ddb19a1a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java @@ -11,7 +11,6 @@ import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -91,7 +90,7 @@ public class PrepareResultCommunitySetStep2 { }); return a; }) - .map(c -> c._2()) + .map(Tuple2::_2) .map(r -> OBJECT_MAPPER.writeValueAsString(r)) .saveAsTextFile(outputPath, GzipCodec.class); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index 4cb241ef2..3690351fb 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -62,6 +62,7 @@ public class SparkResultToCommunityThroughSemRelJob { .orElse(Boolean.TRUE); log.info("saveGraph: {}", saveGraph); + @SuppressWarnings("unchecked") Class resultClazz = (Class) Class.forName(resultClassName); runWithSparkHiveSession( @@ -105,15 +106,15 @@ public class SparkResultToCommunityThroughSemRelJob { R ret = value._1(); Optional rcl = Optional.ofNullable(value._2()); if (rcl.isPresent()) { - Set context_set = new HashSet<>(); - ret.getContext().stream().forEach(c -> context_set.add(c.getId())); + Set contexts = new HashSet<>(); + ret.getContext().forEach(c -> contexts.add(c.getId())); List contextList = rcl .get() .getCommunityList() .stream() .map( c -> { - if (!context_set.contains(c)) { + if (!contexts.contains(c)) { Context newContext = new Context(); newContext.setId(c); newContext @@ -130,7 +131,10 @@ public class SparkResultToCommunityThroughSemRelJob { }) .filter(Objects::nonNull) .collect(Collectors.toList()); + + @SuppressWarnings("unchecked") R r = (R) ret.getClass().newInstance(); + r.setId(ret.getId()); r.setContext(contextList); ret.mergeFrom(r); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java index 3cf36e572..0ef5ca181 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java @@ -8,6 +8,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Optional; +import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; @@ -91,13 +92,11 @@ public class PrepareResultInstRepoAssociation { private static void prepareDatasourceOrganization( SparkSession spark, String datasourceOrganizationPath, List blacklist) { - String blacklisted = ""; - if (blacklist.size() > 0) { - blacklisted = " AND id != '" + blacklist.get(0) + "'"; - for (int i = 1; i < blacklist.size(); i++) { - blacklisted += " AND id != '" + blacklist.get(i) + "'"; - } - } + + final String blacklisted = blacklist + .stream() + .map(s -> " AND id != '" + s + "'") + .collect(Collectors.joining()); String query = "SELECT source datasourceId, target organizationId " + "FROM ( SELECT id " diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index 01d7b85e4..63824f1a8 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -4,23 +4,24 @@ package eu.dnetlib.dhp.resulttoorganizationfrominstrepo; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; -import java.util.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.broadcast.Broadcast; -import org.apache.spark.sql.*; import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; public class SparkResultToOrganizationFromIstRepoJob { @@ -84,7 +85,6 @@ public class SparkResultToOrganizationFromIstRepoJob { conf, isSparkSessionManaged, spark -> { - // removeOutputDir(spark, outputPath); if (saveGraph) { execPropagation( spark, @@ -105,9 +105,9 @@ public class SparkResultToOrganizationFromIstRepoJob { String outputPath, Class clazz) { - Dataset ds_org = readPath(spark, datasourceorganization, DatasourceOrganization.class); + Dataset dsOrg = readPath(spark, datasourceorganization, DatasourceOrganization.class); - Dataset potentialUpdates = getPotentialRelations(spark, inputPath, clazz, ds_org); + Dataset potentialUpdates = getPotentialRelations(spark, inputPath, clazz, dsOrg); Dataset alreadyLinked = readPath(spark, alreadyLinkedPath, ResultOrganizationSet.class); @@ -125,26 +125,20 @@ public class SparkResultToOrganizationFromIstRepoJob { private static FlatMapFunction, Relation> createRelationFn() { return value -> { - List new_relations = new ArrayList<>(); - ResultOrganizationSet potential_update = value._1(); - Optional already_linked = Optional.ofNullable(value._2()); - List organization_list = potential_update.getOrganizationSet(); - if (already_linked.isPresent()) { - already_linked - .get() - .getOrganizationSet() - .stream() - .forEach( - rId -> { - organization_list.remove(rId); - }); - } - String resultId = potential_update.getResultId(); - organization_list - .stream() + List newRelations = new ArrayList<>(); + ResultOrganizationSet potentialUpdate = value._1(); + Optional alreadyLinked = Optional.ofNullable(value._2()); + List organizations = potentialUpdate.getOrganizationSet(); + alreadyLinked + .ifPresent( + resOrg -> resOrg + .getOrganizationSet() + .forEach(organizations::remove)); + String resultId = potentialUpdate.getResultId(); + organizations .forEach( orgId -> { - new_relations + newRelations .add( getRelation( orgId, @@ -155,7 +149,7 @@ public class SparkResultToOrganizationFromIstRepoJob { PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)); - new_relations + newRelations .add( getRelation( resultId, @@ -167,7 +161,7 @@ public class SparkResultToOrganizationFromIstRepoJob { PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_ID, PROPAGATION_RELATION_RESULT_ORGANIZATION_INST_REPO_CLASS_NAME)); }); - return new_relations.iterator(); + return newRelations.iterator(); }; } @@ -175,13 +169,13 @@ public class SparkResultToOrganizationFromIstRepoJob { SparkSession spark, String inputPath, Class resultClazz, - Dataset ds_org) { + Dataset dsOrg) { Dataset result = readPath(spark, inputPath, resultClazz); result.createOrReplaceTempView("result"); createCfHbforResult(spark); - ds_org.createOrReplaceTempView("rels"); + dsOrg.createOrReplaceTempView("rels"); return spark .sql(RESULT_ORGANIZATIONSET_QUERY) diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index 72e0a63fa..07299f01a 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -90,7 +90,7 @@ public class BulkTagJobTest { } @Test - public void noUpdatesTest() throws Exception { + void noUpdatesTest() throws Exception { final String pathMap = BulkTagJobTest.pathMap; SparkBulkTagJob .main( @@ -128,7 +128,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectNoPreviousContextTest() throws Exception { + void bulktagBySubjectNoPreviousContextTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext") .getPath(); @@ -224,7 +224,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { + void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { final String sourcePath = getClass() .getResource( "/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance") @@ -306,7 +306,7 @@ public class BulkTagJobTest { } @Test - public void bulktagByDatasourceTest() throws Exception { + void bulktagByDatasourceTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource") .getPath(); @@ -378,7 +378,7 @@ public class BulkTagJobTest { } @Test - public void bulktagByZenodoCommunityTest() throws Exception { + void bulktagByZenodoCommunityTest() throws Exception { final String sourcePath = getClass() .getResource( "/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity") @@ -500,7 +500,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectDatasourceTest() throws Exception { + void bulktagBySubjectDatasourceTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource") .getPath(); @@ -628,7 +628,7 @@ public class BulkTagJobTest { } @Test - public void bulktagBySubjectDatasourceZenodoCommunityTest() throws Exception { + void bulktagBySubjectDatasourceZenodoCommunityTest() throws Exception { SparkBulkTagJob .main( @@ -724,7 +724,7 @@ public class BulkTagJobTest { } @Test - public void bulktagDatasourcewithConstraintsTest() throws Exception { + void bulktagDatasourcewithConstraintsTest() throws Exception { final String sourcePath = getClass() .getResource( diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java index ca737b79f..861546adb 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java @@ -10,6 +10,7 @@ import org.apache.commons.lang3.StringUtils; import org.dom4j.DocumentException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; import com.google.gson.Gson; @@ -20,12 +21,12 @@ import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; /** Created by miriam on 03/08/2018. */ -public class CommunityConfigurationFactoryTest { +class CommunityConfigurationFactoryTest { private final VerbResolver resolver = new VerbResolver(); @Test - public void parseTest() throws DocumentException, IOException { + void parseTest() throws DocumentException, IOException, SAXException { String xml = IOUtils .toString( getClass() @@ -39,7 +40,7 @@ public class CommunityConfigurationFactoryTest { } @Test - public void applyVerb() + void applyVerb() throws InvocationTargetException, IllegalAccessException, NoSuchMethodException, InstantiationException { Constraint sc = new Constraint(); @@ -52,7 +53,7 @@ public class CommunityConfigurationFactoryTest { } @Test - public void loadSelCriteriaTest() throws DocumentException, IOException { + void loadSelCriteriaTest() throws DocumentException, IOException, SAXException { String xml = IOUtils .toString( getClass() diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java index 88ad43b6b..963ee5529 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java @@ -5,7 +5,6 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import org.apache.commons.io.FileUtils; @@ -25,6 +24,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Country; +import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Software; import scala.Tuple2; @@ -67,7 +67,7 @@ public class CountryPropagationJobTest { } @Test - public void testCountryPropagationSoftware() throws Exception { + void testCountryPropagationSoftware() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/countrypropagation/sample/software") .getPath(); @@ -105,7 +105,7 @@ public class CountryPropagationJobTest { Dataset countryExploded = verificationDs .flatMap( (FlatMapFunction) row -> row.getCountry().iterator(), Encoders.bean(Country.class)) - .map((MapFunction) c -> c.getClassid(), Encoders.STRING()); + .map((MapFunction) Qualifier::getClassid, Encoders.STRING()); Assertions.assertEquals(9, countryExploded.count()); @@ -119,10 +119,9 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryclassid = verificationDs .flatMap((FlatMapFunction>) row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() + List> prova = new ArrayList<>(); + List countryList = row.getCountry(); + countryList .forEach( c -> prova .add( @@ -180,10 +179,9 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryclassname = verificationDs .flatMap( (FlatMapFunction>) row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() + List> prova = new ArrayList<>(); + List countryList = row.getCountry(); + countryList .forEach( c -> prova .add( @@ -241,10 +239,9 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryProvenance = verificationDs .flatMap( (FlatMapFunction>) row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() + List> prova = new ArrayList<>(); + List countryList = row.getCountry(); + countryList .forEach( c -> prova .add( diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java index 238375197..85db7ecf9 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/OrcidPropagationJobTest.java @@ -65,7 +65,7 @@ public class OrcidPropagationJobTest { } @Test - public void noUpdateTest() throws Exception { + void noUpdateTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/orcidtoresultfromsemrel/sample/noupdate") .getPath(); @@ -111,7 +111,7 @@ public class OrcidPropagationJobTest { } @Test - public void oneUpdateTest() throws Exception { + void oneUpdateTest() throws Exception { SparkOrcidToResultFromSemRelJob .main( new String[] { @@ -178,7 +178,7 @@ public class OrcidPropagationJobTest { } @Test - public void twoUpdatesTest() throws Exception { + void twoUpdatesTest() throws Exception { SparkOrcidToResultFromSemRelJob .main( new String[] { diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java index abed028e1..2fe1bc574 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java @@ -69,7 +69,7 @@ public class ProjectPropagationJobTest { * @throws Exception */ @Test - public void NoUpdateTest() throws Exception { + void NoUpdateTest() throws Exception { final String potentialUpdateDate = getClass() .getResource( @@ -106,7 +106,7 @@ public class ProjectPropagationJobTest { * @throws Exception */ @Test - public void UpdateTenTest() throws Exception { + void UpdateTenTest() throws Exception { final String potentialUpdatePath = getClass() .getResource( "/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates") @@ -178,7 +178,7 @@ public class ProjectPropagationJobTest { * @throws Exception */ @Test - public void UpdateMixTest() throws Exception { + void UpdateMixTest() throws Exception { final String potentialUpdatepath = getClass() .getResource( "/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates") diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java index d739516fc..4dd8b976c 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java @@ -65,7 +65,7 @@ public class ResultToCommunityJobTest { } @Test - public void testSparkResultToCommunityFromOrganizationJob() throws Exception { + void testSparkResultToCommunityFromOrganizationJob() throws Exception { final String preparedInfoPath = getClass() .getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo") .getPath(); diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java index 7709e00a8..0d5b12c80 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/ResultToCommunityJobTest.java @@ -65,7 +65,7 @@ public class ResultToCommunityJobTest { } @Test - public void testSparkResultToCommunityThroughSemRelJob() throws Exception { + void testSparkResultToCommunityThroughSemRelJob() throws Exception { SparkResultToCommunityThroughSemRelJob .main( new String[] { diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java index cfcccc5f0..fdcb10fb9 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java @@ -67,7 +67,7 @@ public class ResultToOrganizationJobTest { * @throws Exception */ @Test - public void NoUpdateTest() throws Exception { + void NoUpdateTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") .getPath(); @@ -110,7 +110,7 @@ public class ResultToOrganizationJobTest { * @throws Exception */ @Test - public void UpdateNoMixTest() throws Exception { + void UpdateNoMixTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") .getPath(); @@ -176,7 +176,7 @@ public class ResultToOrganizationJobTest { } @Test - public void UpdateMixTest() throws Exception { + void UpdateMixTest() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix") .getPath(); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java index f06059dd5..95aa749b2 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java @@ -13,7 +13,7 @@ import eu.dnetlib.dhp.schema.oaf.AccessRight; import eu.dnetlib.dhp.schema.oaf.Country; import eu.dnetlib.dhp.schema.oaf.Qualifier; -public class CleaningRuleMap extends HashMap> implements Serializable { +public class CleaningRuleMap extends HashMap, SerializableConsumer> implements Serializable { /** * Creates the mapping for the Oaf types subject to cleaning diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java index 9ba153ba5..5502fd391 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/OafCleaner.java @@ -16,7 +16,7 @@ public class OafCleaner implements Serializable { try { navigate(oaf, mapping); } catch (IllegalAccessException e) { - throw new RuntimeException(e); + throw new IllegalStateException(e); } return oaf; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java index 00ddcb5a8..0d2df11fe 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/MakeTar.java @@ -1,22 +1,21 @@ package eu.dnetlib.dhp.oa.graph.dump; -import java.io.*; +import java.io.IOException; +import java.io.Serializable; import java.util.Optional; -import org.apache.commons.compress.archivers.ar.ArArchiveEntry; -import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream; -import org.apache.commons.compress.archivers.tar.TarArchiveEntry; -import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.MakeTarArchive; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; public class MakeTar implements Serializable { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java index d118accba..dc740e811 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystem.java @@ -8,6 +8,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -29,7 +30,7 @@ public class QueryInformationSystem { ""; public CommunityMap getCommunityMap() - throws ISLookUpException, DocumentException { + throws ISLookUpException, DocumentException, SAXException { return getMap(isLookUp.quickSearchProfile(XQUERY)); } @@ -42,12 +43,14 @@ public class QueryInformationSystem { this.isLookUp = isLookUpService; } - private CommunityMap getMap(List communityMap) throws DocumentException { + private CommunityMap getMap(List communityMap) throws DocumentException, SAXException { final CommunityMap map = new CommunityMap(); for (String xml : communityMap) { final Document doc; - doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + doc = reader.read(new StringReader(xml)); Element root = doc.getRootElement(); map.put(root.attribute("id").getValue(), root.attribute("label").getValue()); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java index d30b3122c..500fe5986 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/ResultMapper.java @@ -93,7 +93,7 @@ public class ResultMapper implements Serializable { .setDocumentationUrl( value .stream() - .map(v -> v.getValue()) + .map(Field::getValue) .collect(Collectors.toList()))); Optional @@ -109,20 +109,20 @@ public class ResultMapper implements Serializable { .setContactgroup( Optional .ofNullable(ir.getContactgroup()) - .map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setContactperson( Optional .ofNullable(ir.getContactperson()) - .map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out .setTool( Optional .ofNullable(ir.getTool()) - .map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList())) + .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList())) .orElse(null)); out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); @@ -132,7 +132,8 @@ public class ResultMapper implements Serializable { Optional .ofNullable(input.getAuthor()) - .ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList()))); + .ifPresent( + ats -> out.setAuthor(ats.stream().map(ResultMapper::getAuthor).collect(Collectors.toList()))); // I do not map Access Right UNKNOWN or OTHER @@ -219,11 +220,12 @@ public class ResultMapper implements Serializable { if (oInst.isPresent()) { if (Constants.DUMPTYPE.COMPLETE.getType().equals(dumpType)) { ((GraphResult) out) - .setInstance(oInst.get().stream().map(i -> getGraphInstance(i)).collect(Collectors.toList())); + .setInstance( + oInst.get().stream().map(ResultMapper::getGraphInstance).collect(Collectors.toList())); } else { ((CommunityResult) out) .setInstance( - oInst.get().stream().map(i -> getCommunityInstance(i)).collect(Collectors.toList())); + oInst.get().stream().map(ResultMapper::getCommunityInstance).collect(Collectors.toList())); } } @@ -422,7 +424,7 @@ public class ResultMapper implements Serializable { Optional .ofNullable(i.getInstancetype()) .ifPresent(value -> instance.setType(value.getClassname())); - Optional.ofNullable(i.getUrl()).ifPresent(value -> instance.setUrl(value)); + Optional.ofNullable(i.getUrl()).ifPresent(instance::setUrl); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java index 6ac626518..f86f6918f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SaveCommunityMap.java @@ -15,6 +15,7 @@ import org.apache.hadoop.fs.Path; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; @@ -31,25 +32,23 @@ public class SaveCommunityMap implements Serializable { private static final Logger log = LoggerFactory.getLogger(SaveCommunityMap.class); private final QueryInformationSystem queryInformationSystem; - private final Configuration conf; private final BufferedWriter writer; public SaveCommunityMap(String hdfsPath, String hdfsNameNode, String isLookUpUrl) throws IOException { - conf = new Configuration(); + final Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(conf); Path hdfsWritePath = new Path(hdfsPath); - FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { - fileSystem.delete(hdfsWritePath); + fileSystem.delete(hdfsWritePath, true); } - fsDataOutputStream = fileSystem.create(hdfsWritePath); queryInformationSystem = new QueryInformationSystem(); queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); - writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); - + FSDataOutputStream fos = fileSystem.create(hdfsWritePath); + writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); } public static void main(String[] args) throws Exception { @@ -74,10 +73,9 @@ public class SaveCommunityMap implements Serializable { final SaveCommunityMap scm = new SaveCommunityMap(outputPath, nameNode, isLookUpUrl); scm.saveCommunityMap(); - } - private void saveCommunityMap() throws ISLookUpException, IOException, DocumentException { + private void saveCommunityMap() throws ISLookUpException, IOException, DocumentException, SAXException { writer.write(Utils.OBJECT_MAPPER.writeValueAsString(queryInformationSystem.getCommunityMap())); writer.close(); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java index fd8262544..ba26b708a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/SendToZenodoHDFS.java @@ -17,9 +17,9 @@ import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; public class SendToZenodoHDFS implements Serializable { - private final static String NEW = "new"; // to be used for a brand new deposition in zenodo - private final static String VERSION = "version"; // to be used to upload a new version of a published deposition - private final static String UPDATE = "update"; // to upload content to an open deposition not published + private static final String NEW = "new"; // to be used for a brand new deposition in zenodo + private static final String VERSION = "version"; // to be used to upload a new version of a published deposition + private static final String UPDATE = "update"; // to upload content to an open deposition not published private static final Log log = LogFactory.getLog(SendToZenodoHDFS.class); @@ -85,7 +85,6 @@ public class SendToZenodoHDFS implements Serializable { Path p = fileStatus.getPath(); String p_string = p.toString(); if (!p_string.endsWith("_SUCCESS")) { - // String tmp = p_string.substring(0, p_string.lastIndexOf("/")); String name = p_string.substring(p_string.lastIndexOf("/") + 1); log.info("Sending information for community: " + name); if (communityMap.containsKey(name.substring(0, name.lastIndexOf(".")))) { @@ -102,9 +101,9 @@ public class SendToZenodoHDFS implements Serializable { zenodoApiClient.sendMretadata(metadata); } - if (publish) + if (publish) { zenodoApiClient.publish(); - + } } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java index 984e8b128..8e75e9d92 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/Utils.java @@ -25,6 +25,9 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class Utils { public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private Utils() { + } + public static void removeOutputDir(SparkSession spark, String path) { HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); } @@ -57,7 +60,7 @@ public class Utils { public static CommunityMap readCommunityMap(FileSystem fileSystem, String communityMapPath) throws IOException { BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(new Path(communityMapPath)))); - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); try { String line; while ((line = br.readLine()) != null) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java index 55f075e95..a9c0770a8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java @@ -57,27 +57,22 @@ public class CommunitySplit implements Serializable { Dataset community_products = result .filter((FilterFunction) r -> containsCommunity(r, c)); - try { - community_products.first(); - community_products - .write() - .option("compression", "gzip") - .mode(SaveMode.Overwrite) - .json(outputPath + "/" + c); - } catch (Exception e) { - - } - + community_products.first(); + community_products + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(outputPath + "/" + c); } private static boolean containsCommunity(CommunityResult r, String c) { if (Optional.ofNullable(r.getContext()).isPresent()) { - return r + return !r .getContext() .stream() .filter(con -> con.getCode().equals(c)) .collect(Collectors.toList()) - .size() > 0; + .isEmpty(); } return false; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java index 63970d14b..7ab8a7540 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkDumpCommunityProducts.java @@ -2,9 +2,7 @@ package eu.dnetlib.dhp.oa.graph.dump.community; import java.io.Serializable; -import java.util.*; - -import javax.swing.text.html.Option; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.slf4j.Logger; @@ -12,7 +10,6 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.DumpProducts; -import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.oaf.Result; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java index 2d43888b4..0f2b4c2ed 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkPrepareResultProject.java @@ -22,6 +22,7 @@ import org.dom4j.Node; import org.dom4j.io.SAXReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Utils; @@ -29,6 +30,7 @@ import eu.dnetlib.dhp.schema.dump.oaf.Provenance; import eu.dnetlib.dhp.schema.dump.oaf.community.Funder; import eu.dnetlib.dhp.schema.dump.oaf.community.Project; import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Field; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; @@ -127,11 +129,11 @@ public class SparkPrepareResultProject implements Serializable { op.getCode().getValue(), Optional .ofNullable(op.getAcronym()) - .map(a -> a.getValue()) + .map(Field::getValue) .orElse(null), Optional .ofNullable(op.getTitle()) - .map(v -> v.getValue()) + .map(Field::getValue) .orElse(null), Optional .ofNullable(op.getFundingtree()) @@ -140,7 +142,7 @@ public class SparkPrepareResultProject implements Serializable { .stream() .map(ft -> getFunder(ft.getValue())) .collect(Collectors.toList()); - if (tmp.size() > 0) { + if (!tmp.isEmpty()) { return tmp.get(0); } else { return null; @@ -161,30 +163,23 @@ public class SparkPrepareResultProject implements Serializable { } private static Funder getFunder(String fundingtree) { - // ["nsf_________::NSFNSFNational Science - // FoundationUSnsf_________::NSF::CISE/OAD::CISE/CCFDivision - // of Computing and Communication FoundationsDivision of Computing and Communication - // Foundationsnsf_________::NSF::CISE/OADDirectorate for - // Computer & Information Science & EngineeringDirectorate for Computer & - // Information Science & - // Engineeringnsf:fundingStream"] - Funder f = new Funder(); + final Funder f = new Funder(); final Document doc; try { - doc = new SAXReader().read(new StringReader(fundingtree)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + doc = reader.read(new StringReader(fundingtree)); f.setShortName(((Node) (doc.selectNodes("//funder/shortname").get(0))).getText()); f.setName(((Node) (doc.selectNodes("//funder/name").get(0))).getText()); f.setJurisdiction(((Node) (doc.selectNodes("//funder/jurisdiction").get(0))).getText()); for (Object o : doc.selectNodes("//funding_level_0")) { List node = ((Node) o).selectNodes("./name"); f.setFundingStream(((Node) node.get(0)).getText()); - } return f; - } catch (DocumentException e) { - e.printStackTrace(); + } catch (DocumentException | SAXException e) { + throw new IllegalArgumentException(e); } - return f; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java index 2b80b1d86..39ae32053 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/SparkUpdateProjectInfo.java @@ -60,7 +60,7 @@ public class SparkUpdateProjectInfo implements Serializable { isSparkSessionManaged, spark -> { Utils.removeOutputDir(spark, outputPath); - extend(spark, inputPath, outputPath, preparedInfoPath);// , inputClazz); + extend(spark, inputPath, outputPath, preparedInfoPath); }); } @@ -77,9 +77,7 @@ public class SparkUpdateProjectInfo implements Serializable { "left") .map((MapFunction, CommunityResult>) value -> { CommunityResult r = value._1(); - Optional.ofNullable(value._2()).ifPresent(rp -> { - r.setProjects(rp.getProjectsList()); - }); + Optional.ofNullable(value._2()).ifPresent(rp -> r.setProjects(rp.getProjectsList())); return r; }, Encoders.bean(CommunityResult.class)) .write() diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java index eb546624e..57708a78d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Constants.java @@ -23,5 +23,4 @@ public class Constants implements Serializable { public static final String CONTEXT_NS_PREFIX = "context_____"; public static final String UNKNOWN = "UNKNOWN"; - // public static final String FUNDER_DS = "entityregistry::projects"; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java index ccb84c713..120de9327 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextEntities.java @@ -1,12 +1,14 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; -import java.io.*; +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.function.Consumer; import java.util.function.Function; -import org.apache.commons.crypto.utils.IoUtils; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -14,15 +16,13 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; -import org.apache.hadoop.io.compress.CompressionOutputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; - import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; /** * Writes on HDFS Context entities. It queries the Information System at the lookup url provided as parameter and @@ -33,8 +33,8 @@ import eu.dnetlib.dhp.schema.dump.oaf.graph.ResearchInitiative; public class CreateContextEntities implements Serializable { private static final Logger log = LoggerFactory.getLogger(CreateContextEntities.class); - private final Configuration conf; - private final BufferedWriter writer; + private final transient Configuration conf; + private final transient BufferedWriter writer; public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils @@ -88,7 +88,7 @@ public class CreateContextEntities implements Serializable { } public void execute(final Function producer, String isLookUpUrl) - throws Exception { + throws ISLookUpException { QueryInformationSystem queryInformationSystem = new QueryInformationSystem(); queryInformationSystem.setIsLookUp(Utils.getIsLookUpService(isLookUpUrl)); @@ -101,10 +101,9 @@ public class CreateContextEntities implements Serializable { protected void writeEntity(final R r) { try { writer.write(Utils.OBJECT_MAPPER.writeValueAsString(r)); - // log.info("writing context : {}", new Gson().toJson(r)); writer.newLine(); - } catch (final Exception e) { - throw new RuntimeException(e); + } catch (final IOException e) { + throw new IllegalArgumentException(e); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java index 102406315..10f2014d0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateContextRelation.java @@ -7,6 +7,7 @@ import java.io.OutputStreamWriter; import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.function.Consumer; import java.util.function.Function; @@ -31,20 +32,21 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; * and the project is not created because of a low coverage in the profiles of openaire ids related to projects */ public class CreateContextRelation implements Serializable { - private static final Logger log = LoggerFactory.getLogger(CreateContextEntities.class); - private final Configuration conf; - private final BufferedWriter writer; - private final QueryInformationSystem queryInformationSystem; + private static final Logger log = LoggerFactory.getLogger(CreateContextRelation.class); + private final transient Configuration conf; + private final transient BufferedWriter writer; + private final transient QueryInformationSystem queryInformationSystem; private static final String CONTEX_RELATION_DATASOURCE = "contentproviders"; - private static final String CONTEX_RELATION_PROJECT = "projects"; public static void main(String[] args) throws Exception { String jsonConfiguration = IOUtils .toString( - CreateContextRelation.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json")); + Objects + .requireNonNull( + CreateContextRelation.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/dump/complete/input_entity_parameter.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -70,10 +72,6 @@ public class CreateContextRelation implements Serializable { cce.execute(Process::getRelation, CONTEX_RELATION_DATASOURCE, ModelSupport.getIdPrefix(Datasource.class)); log.info("Creating relations for projects... "); -// cce -// .execute( -// Process::getRelation, CONTEX_RELATION_PROJECT, -// ModelSupport.getIdPrefix(eu.dnetlib.dhp.schema.oaf.Project.class)); cce.close(); @@ -107,7 +105,7 @@ public class CreateContextRelation implements Serializable { public void execute(final Function> producer, String category, String prefix) { - final Consumer consumer = ci -> producer.apply(ci).forEach(c -> writeEntity(c)); + final Consumer consumer = ci -> producer.apply(ci).forEach(this::writeEntity); queryInformationSystem.getContextRelation(consumer, category, prefix); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java index 31d105b66..6b9f13277 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/Process.java @@ -6,8 +6,6 @@ import java.util.ArrayList; import java.util.List; import org.apache.commons.lang3.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.Utils; @@ -21,8 +19,8 @@ import eu.dnetlib.dhp.schema.dump.oaf.graph.*; * context entity and datasource/projects related to the context. */ public class Process implements Serializable { - private static final Logger log = LoggerFactory.getLogger(Process.class); + @SuppressWarnings("unchecked") public static R getEntity(ContextInfo ci) { try { ResearchInitiative ri; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java index c33a693a5..0ed5de67c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystem.java @@ -11,6 +11,7 @@ import org.dom4j.Element; import org.dom4j.Node; import org.dom4j.io.SAXReader; import org.jetbrains.annotations.NotNull; +import org.xml.sax.SAXException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -84,8 +85,9 @@ public class QueryInformationSystem { final Document doc; try { - - doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + doc = reader.read(new StringReader(xml)); Element root = doc.getRootElement(); cinfo.setId(root.attributeValue("id")); @@ -102,7 +104,7 @@ public class QueryInformationSystem { } consumer.accept(cinfo); - } catch (DocumentException e) { + } catch (DocumentException | SAXException e) { e.printStackTrace(); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java index 868fa89fe..4365e861f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/complete/SparkOrganizationRelation.java @@ -61,7 +61,7 @@ public class SparkOrganizationRelation implements Serializable { log.info("organization map : {}", new Gson().toJson(organizationMap)); final String communityMapPath = parser.get("communityMapPath"); - log.info("communityMapPath: {} ", communityMapPath); + log.info("communityMapPath: {}", communityMapPath); SparkConf conf = new SparkConf(); @@ -117,15 +117,12 @@ public class SparkOrganizationRelation implements Serializable { } })); - // if (relList.size() > 0) { spark .createDataset(relList, Encoders.bean(eu.dnetlib.dhp.schema.dump.oaf.graph.Relation.class)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); - // } - } @NotNull diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java index 00f604b14..d8a1b5c21 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkDumpFunderResults.java @@ -4,7 +4,9 @@ package eu.dnetlib.dhp.oa.graph.dump.funderresults; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; -import java.util.*; +import java.util.List; +import java.util.Objects; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -14,16 +16,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.api.zenodo.Community; -import eu.dnetlib.dhp.oa.graph.dump.Constants; -import eu.dnetlib.dhp.oa.graph.dump.ResultMapper; import eu.dnetlib.dhp.oa.graph.dump.Utils; -import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.dump.oaf.community.CommunityResult; import eu.dnetlib.dhp.schema.dump.oaf.community.Project; import eu.dnetlib.dhp.schema.oaf.Relation; -import scala.Tuple2; /** * Splits the dumped results by funder and stores them in a folder named as the funder nsp (for all the funders, but the EC diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java index 1a28a21f4..2d2b04b5c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/funderresults/SparkResultLinkedToProject.java @@ -18,7 +18,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.graph.dump.Constants; import eu.dnetlib.dhp.oa.graph.dump.Utils; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; @@ -62,6 +61,7 @@ public class SparkResultLinkedToProject implements Serializable { final String relationPath = parser.get("relationPath"); log.info("relationPath: {}", relationPath); + @SuppressWarnings("unchecked") Class inputClazz = (Class) Class.forName(resultClassName); SparkConf conf = new SparkConf(); @@ -95,9 +95,9 @@ public class SparkResultLinkedToProject implements Serializable { ._2() .getId(), Encoders.STRING()) - .mapGroups((MapGroupsFunction, R>) (k, it) -> { - return it.next()._2(); - }, Encoders.bean(inputClazz)) + .mapGroups( + (MapGroupsFunction, R>) (k, it) -> it.next()._2(), + Encoders.bean(inputClazz)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java index 59bdb3914..f87c0eb7a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/DatasourceCompatibilityComparator.java @@ -74,24 +74,4 @@ public class DatasourceCompatibilityComparator implements Comparator return lClass.compareTo(rClass); } - /* - * CASE WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY - * ['openaire-cris_1.1']) THEN 'openaire-cris_1.1@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT - * COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire4.0']) THEN - * 'openaire4.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0']) THEN - * 'driver-openaire2.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE - * (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['driver']) THEN - * 'driver@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility) :: TEXT) @> ARRAY ['openaire2.0']) THEN 'openaire2.0@@@dnet:datasourceCompatibilityLevel' WHEN - * (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire3.0']) THEN - * 'openaire3.0@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility) :: TEXT) @> ARRAY ['openaire2.0_data']) THEN - * 'openaire2.0_data@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE - * (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['native']) THEN - * 'native@@@dnet:datasourceCompatibilityLevel' WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, - * a.compatibility) :: TEXT) @> ARRAY ['hostedBy']) THEN 'hostedBy@@@dnet:datasourceCompatibilityLevel' WHEN - * (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['notCompatible']) - * THEN 'notCompatible@@@dnet:datasourceCompatibilityLevel' ELSE 'UNKNOWN@@@dnet:datasourceCompatibilityLevel' END - */ } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java index 602213e58..ef419a042 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJob.java @@ -6,8 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.*; import java.util.stream.Collectors; -import javax.xml.crypto.Data; - import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -16,7 +14,6 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SaveMode; import org.apache.spark.sql.SparkSession; -import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,7 +21,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; -import eu.dnetlib.dhp.oa.graph.clean.CleanGraphSparkJob; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; @@ -55,9 +51,11 @@ public class MergeGraphTableSparkJob { String jsonConfiguration = IOUtils .toString( - CleanGraphSparkJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/merge_graphs_parameters.json")); + Objects + .requireNonNull( + MergeGraphTableSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/merge_graphs_parameters.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -133,7 +131,7 @@ public class MergeGraphTableSparkJob { HashSet collectedFromNames = Optional .ofNullable(o.getCollectedfrom()) .map(c -> c.stream().map(KeyValue::getValue).collect(Collectors.toCollection(HashSet::new))) - .orElse(new HashSet()); + .orElse(new HashSet<>()); return !collectedFromNames.contains("Datacite"); }) .write() diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 03c3eeb3c..9aa4e4c31 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -32,8 +32,6 @@ import org.dom4j.Document; import org.dom4j.DocumentFactory; import org.dom4j.DocumentHelper; import org.dom4j.Node; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -88,8 +86,6 @@ public abstract class AbstractMdRecordToOafMapper { protected static final Map nsContext = new HashMap<>(); - private static final Logger log = LoggerFactory.getLogger(DispatchEntitiesApplication.class); - static { nsContext.put("dr", "http://www.driver-repository.eu/namespace/dr"); nsContext.put("dri", "http://www.driver-repository.eu/namespace/dri"); @@ -117,9 +113,6 @@ public abstract class AbstractMdRecordToOafMapper { } public List processMdRecord(final String xml) { - - // log.info("Processing record: " + xml); - try { DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); @@ -134,7 +127,7 @@ public abstract class AbstractMdRecordToOafMapper { doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); if (collectedFrom == null) { - return null; + return Lists.newArrayList(); } final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) @@ -142,7 +135,7 @@ public abstract class AbstractMdRecordToOafMapper { : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); if (hostedBy == null) { - return null; + return Lists.newArrayList(); } final DataInfo info = prepareDataInfo(doc, invisible); @@ -161,21 +154,17 @@ public abstract class AbstractMdRecordToOafMapper { protected String getResultType(final Document doc, final List instances) { final String type = doc.valueOf("//dr:CobjCategory/@type"); - if (StringUtils.isBlank(type) & vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { + if (StringUtils.isBlank(type) && vocs.vocabularyExists(ModelConstants.DNET_RESULT_TYPOLOGIES)) { final String instanceType = instances .stream() .map(i -> i.getInstancetype().getClassid()) .findFirst() - .map(s -> UNKNOWN.equalsIgnoreCase(s) ? "0000" : s) + .filter(s -> !UNKNOWN.equalsIgnoreCase(s)) .orElse("0000"); // Unknown return Optional .ofNullable(vocs.getSynonymAsQualifier(ModelConstants.DNET_RESULT_TYPOLOGIES, instanceType)) - .map(q -> q.getClassid()) + .map(Qualifier::getClassid) .orElse("0000"); - /* - * .orElseThrow( () -> new IllegalArgumentException( String.format("'%s' not mapped in %s", instanceType, - * DNET_RESULT_TYPOLOGIES))); - */ } return type; @@ -185,7 +174,7 @@ public abstract class AbstractMdRecordToOafMapper { final String dsId = doc.valueOf(xpathId); final String dsName = doc.valueOf(xpathName); - if (StringUtils.isBlank(dsId) | StringUtils.isBlank(dsName)) { + if (StringUtils.isBlank(dsId) || StringUtils.isBlank(dsName)) { return null; } @@ -498,7 +487,6 @@ public abstract class AbstractMdRecordToOafMapper { accessRight.setSchemename(qualifier.getSchemename()); // TODO set the OAStatus - // accessRight.setOaStatus(...); return accessRight; } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index bbfb7429f..9027b49d7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -127,7 +127,7 @@ public class GenerateEntitiesApplication { .map(k -> new Tuple2<>(k._1().toString(), k._2().toString())) .map(k -> convertToListOaf(k._1(), k._2(), shouldHashId, vocs)) .filter(Objects::nonNull) - .flatMap(list -> list.iterator())); + .flatMap(List::iterator)); } switch (mode) { @@ -135,7 +135,7 @@ public class GenerateEntitiesApplication { save( inputRdd .mapToPair(oaf -> new Tuple2<>(ModelSupport.idFn().apply(oaf), oaf)) - .reduceByKey((o1, o2) -> OafMapperUtils.merge(o1, o2)) + .reduceByKey(OafMapperUtils::merge) .map(Tuple2::_2), targetPath); break; @@ -191,7 +191,7 @@ public class GenerateEntitiesApplication { case "otherresearchproduct": return Arrays.asList(convertFromJson(s, OtherResearchProduct.class)); default: - throw new RuntimeException("type not managed: " + type.toLowerCase()); + throw new IllegalArgumentException("type not managed: " + type.toLowerCase()); } } @@ -199,9 +199,9 @@ public class GenerateEntitiesApplication { try { return OBJECT_MAPPER.readValue(s, clazz); } catch (final Exception e) { - log.error("Error parsing object of class: " + clazz); + log.error("Error parsing object of class: {}", clazz); log.error(s); - throw new RuntimeException(e); + throw new IllegalArgumentException(e); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java index d5c310c1b..ee1b6a5da 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MergeClaimsApplication.java @@ -40,9 +40,11 @@ public class MergeClaimsApplication { final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - MigrateMongoMdstoresApplication.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json"))); + Objects + .requireNonNull( + MergeClaimsApplication.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json")))); parser.parseArgument(args); Boolean isSparkSessionManaged = Optional diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java index a9d3e05fe..b9033702d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplication.java @@ -9,10 +9,7 @@ import java.io.IOException; import java.sql.Array; import java.sql.ResultSet; import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.List; +import java.util.*; import java.util.function.Consumer; import java.util.function.Function; import java.util.function.Predicate; @@ -72,8 +69,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i final ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - MigrateDbEntitiesApplication.class - .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json"))); + Objects + .requireNonNull( + MigrateDbEntitiesApplication.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_db_entities_parameters.json")))); parser.parseArgument(args); @@ -87,7 +86,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i log.info("postgresPassword: xxx"); final String dbSchema = parser.get("dbschema"); - log.info("dbSchema {}: " + dbSchema); + log.info("dbSchema {}: ", dbSchema); final String isLookupUrl = parser.get("isLookupUrl"); log.info("isLookupUrl: {}", isLookupUrl); @@ -659,18 +658,6 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i r1.setDataInfo(info); r1.setLastupdatetimestamp(lastUpdateTimestamp); - // removed because there's no difference between two sides //TODO -// final Relation r2 = new Relation(); -// r2.setRelType(ORG_ORG_RELTYPE); -// r2.setSubRelType(ORG_ORG_SUBRELTYPE); -// r2.setRelClass(relClass); -// r2.setSource(orgId2); -// r2.setTarget(orgId1); -// r2.setCollectedfrom(collectedFrom); -// r2.setDataInfo(info); -// r2.setLastupdatetimestamp(lastUpdateTimestamp); -// return Arrays.asList(r1, r2); - return Arrays.asList(r1); } catch (final Exception e) { throw new RuntimeException(e); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java index 1d4eca2c2..4110bd806 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateHdfsMdstoresApplication.java @@ -3,6 +3,7 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.io.IOException; import java.io.StringReader; import java.text.SimpleDateFormat; import java.util.Arrays; @@ -82,11 +83,11 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication public static void processPaths(final SparkSession spark, final String outputPath, final Set paths, - final String type) throws Exception { + final String type) { final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - log.info("Found " + paths.size() + " not empty mdstores"); + log.info("Found {} not empty mdstores", paths.size()); paths.forEach(log::info); final String[] validPaths = paths @@ -98,7 +99,7 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication spark .read() .parquet(validPaths) - .map((MapFunction) r -> enrichRecord(r), Encoders.STRING()) + .map((MapFunction) MigrateHdfsMdstoresApplication::enrichRecord, Encoders.STRING()) .toJavaRDD() .mapToPair(xml -> new Tuple2<>(new Text(UUID.randomUUID() + ":" + type), new Text(xml))) // .coalesce(1) @@ -120,7 +121,9 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication final String tranDate = dateFormat.format(new Date((Long) r.getAs("dateOfTransformation"))); try { - final Document doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(xml)); final Element head = (Element) doc.selectSingleNode("//*[local-name() = 'header']"); head.addElement(new QName("objIdentifier", DRI_NS_PREFIX)).addText(r.getAs("id")); head.addElement(new QName("dateOfCollection", DRI_NS_PREFIX)).addText(collDate); @@ -135,8 +138,7 @@ public class MigrateHdfsMdstoresApplication extends AbstractMigrationApplication private static Set mdstorePaths(final String mdstoreManagerUrl, final String format, final String layout, - final String interpretation) - throws Exception { + final String interpretation) throws IOException { final String url = mdstoreManagerUrl + "/mdstores/"; final ObjectMapper objectMapper = new ObjectMapper(); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java index 3f6afbeac..6dbab96cb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplication.java @@ -51,10 +51,10 @@ public class MigrateMongoMdstoresApplication extends AbstractMigrationApplicatio public void execute(final String format, final String layout, final String interpretation) { final Map colls = mdstoreClient.validCollections(format, layout, interpretation); - log.info("Found " + colls.size() + " mdstores"); + log.info("Found {} mdstores", colls.size()); for (final Entry entry : colls.entrySet()) { - log.info("Processing mdstore " + entry.getKey() + " (collection: " + entry.getValue() + ")"); + log.info("Processing mdstore {} (collection: {})", entry.getKey(), entry.getValue()); final String currentColl = entry.getValue(); for (final String xml : mdstoreClient.listRecords(currentColl)) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index d753cddeb..2b49a9dc1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -19,7 +19,6 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; @@ -56,8 +55,8 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { .valueOf("./@nameIdentifierScheme") .trim() .toUpperCase() - .replaceAll(" ", "") - .replaceAll("_", ""); + .replace(" ", "") + .replace("_", ""); author.setPid(new ArrayList<>()); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 7925a7826..02aab4f16 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -13,7 +13,6 @@ import org.dom4j.Node; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; @@ -88,11 +87,11 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { .valueOf("./@nameIdentifierScheme") .trim() .toUpperCase() - .replaceAll(" ", "") - .replaceAll("_", ""); + .replace(" ", "") + .replace("_", ""); if (type.toLowerCase().startsWith(ORCID)) { - final String cleanedId = id.replaceAll("http://orcid.org/", "").replaceAll("https://orcid.org/", ""); + final String cleanedId = id.replace("http://orcid.org/", "").replace("https://orcid.org/", ""); res.add(structuredProperty(cleanedId, ORCID_PID_TYPE, info)); } else if (type.startsWith("MAGID")) { res.add(structuredProperty(id, MAG_PID_TYPE, info)); @@ -388,7 +387,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { @Override protected List prepareResultPids(final Document doc, final DataInfo info) { - final Set res = new HashSet(); + final Set res = new HashSet<>(); res .addAll( prepareListStructPropsWithValidQualifier( diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java index 5523863ff..edfd65299 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationsApplication.java @@ -4,12 +4,10 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.FileNotFoundException; -import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java index a0ce4f5a6..5d32fe926 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/common/AbstractMigrationApplication.java @@ -12,6 +12,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Oaf; @@ -34,7 +35,7 @@ public class AbstractMigrationApplication implements Closeable { this.writer = null; } - public AbstractMigrationApplication(final String hdfsPath) throws Exception { + public AbstractMigrationApplication(final String hdfsPath) throws IOException { log.info(String.format("Creating SequenceFile Writer, hdfsPath=%s", hdfsPath)); @@ -46,15 +47,14 @@ public class AbstractMigrationApplication implements Closeable { SequenceFile.Writer.valueClass(Text.class)); } - private Configuration getConf() throws IOException { - final Configuration conf = new Configuration(); + private Configuration getConf() { + return new Configuration(); /* * conf.set("fs.defaultFS", hdfsNameNode); conf.set("fs.hdfs.impl", * org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", * org.apache.hadoop.fs.LocalFileSystem.class.getName()); System.setProperty("HADOOP_USER_NAME", hdfsUser); * System.setProperty("hadoop.home.dir", "/"); FileSystem.get(URI.create(hdfsNameNode), conf); */ - return conf; } protected void emit(final String s, final String type) { @@ -62,16 +62,16 @@ public class AbstractMigrationApplication implements Closeable { key.set(counter.getAndIncrement() + ":" + type); value.set(s); writer.append(key, value); - } catch (final Exception e) { - throw new RuntimeException(e); + } catch (final IOException e) { + throw new IllegalStateException(e); } } protected void emitOaf(final Oaf oaf) { try { emit(objectMapper.writeValueAsString(oaf), oaf.getClass().getSimpleName().toLowerCase()); - } catch (final Exception e) { - throw new RuntimeException(e); + } catch (JsonProcessingException e) { + throw new IllegalStateException(e); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java index 32f6e7abc..afaac04ea 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/GraphHiveImporterJobTest.java @@ -68,7 +68,7 @@ public class GraphHiveImporterJobTest { } @Test - public void testImportGraphAsHiveDB() throws Exception { + void testImportGraphAsHiveDB() throws Exception { GraphHiveImporterJob .main( diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index b196d1948..edcd72ab4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -50,7 +50,7 @@ public class GraphCleaningFunctionsTest { } @Test - public void testCleaning() throws Exception { + void testCleaning() throws Exception { assertNotNull(vocabularies); assertNotNull(mapping); @@ -166,10 +166,6 @@ public class GraphCleaningFunctionsTest { // TODO add more assertions to verity the cleaned values System.out.println(MAPPER.writeValueAsString(p_cleaned)); - - /* - * assertTrue( p_out .getPid() .stream() .allMatch(sp -> StringUtils.isNotBlank(sp.getValue()))); - */ } private Stream getAuthorPidTypes(Result pub) { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java index 803ae0416..697ec705f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/GenerateJsonSchema.java @@ -10,10 +10,10 @@ import com.github.victools.jsonschema.generator.*; import eu.dnetlib.dhp.schema.dump.oaf.graph.*; @Disabled -public class GenerateJsonSchema { +class GenerateJsonSchema { @Test - public void generateSchema() { + void generateSchema() { SchemaGeneratorConfigBuilder configBuilder = new SchemaGeneratorConfigBuilder(SchemaVersion.DRAFT_7, OptionPreset.PLAIN_JSON) .with(Option.SCHEMA_VERSION_INDICATOR) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java index 51e4e1033..41b906e58 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/MakeTarTest.java @@ -22,7 +22,7 @@ public class MakeTarTest { } @Test - public void testTar() throws IOException { + void testTar() throws IOException { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); fs diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java index d5a9ba8dd..03eed7a45 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/PrepareResultProjectJobTest.java @@ -1,10 +1,11 @@ package eu.dnetlib.dhp.oa.graph.dump; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.HashMap; import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; @@ -15,7 +16,6 @@ import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.slf4j.Logger; @@ -37,8 +37,6 @@ public class PrepareResultProjectJobTest { private static final Logger log = LoggerFactory .getLogger(eu.dnetlib.dhp.oa.graph.dump.PrepareResultProjectJobTest.class); - private static final HashMap map = new HashMap<>(); - @BeforeAll public static void beforeAll() throws IOException { workingDir = Files @@ -69,7 +67,7 @@ public class PrepareResultProjectJobTest { } @Test - public void testNoMatch() throws Exception { + void testNoMatch() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/no_match") @@ -90,12 +88,12 @@ public class PrepareResultProjectJobTest { org.apache.spark.sql.Dataset verificationDataset = spark .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); - Assertions.assertEquals(0, verificationDataset.count()); + assertEquals(0, verificationDataset.count()); } @Test - public void testMatchOne() throws Exception { + void testMatchOne() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/match_one") @@ -116,12 +114,11 @@ public class PrepareResultProjectJobTest { org.apache.spark.sql.Dataset verificationDataset = spark .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); - Assertions.assertTrue(verificationDataset.count() == 1); + assertEquals(1, verificationDataset.count()); - Assertions - .assertEquals( - 1, - verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); + assertEquals( + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); verificationDataset.createOrReplaceTempView("table"); @@ -131,14 +128,14 @@ public class PrepareResultProjectJobTest { "from table " + "lateral view explode (projectsList) pl as projList"); - Assertions.assertEquals(1, check.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); + assertEquals(1, check.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); verificationDataset.show(false); } @Test - public void testMatch() throws Exception { + void testMatch() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultProject/match") @@ -159,16 +156,14 @@ public class PrepareResultProjectJobTest { org.apache.spark.sql.Dataset verificationDataset = spark .createDataset(tmp.rdd(), Encoders.bean(ResultProject.class)); - Assertions.assertTrue(verificationDataset.count() == 2); + assertEquals(2, verificationDataset.count()); - Assertions - .assertEquals( - 1, - verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); - Assertions - .assertEquals( - 1, - verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); + assertEquals( + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'").count()); + assertEquals( + 1, + verificationDataset.filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'").count()); verificationDataset.createOrReplaceTempView("dataset"); @@ -177,62 +172,54 @@ public class PrepareResultProjectJobTest { + "lateral view explode(projectsList) p as MyT "; org.apache.spark.sql.Dataset resultExplodedProvenance = spark.sql(query); - Assertions.assertEquals(3, resultExplodedProvenance.count()); - Assertions - .assertEquals( - 2, - resultExplodedProvenance - .filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") - .count()); + assertEquals(3, resultExplodedProvenance.count()); + assertEquals( + 2, + resultExplodedProvenance + .filter("resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter("resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") + .count()); - Assertions - .assertEquals( - 2, - resultExplodedProvenance - .filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'") - .count()); + assertEquals( + 2, + resultExplodedProvenance + .filter("project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::0f7d119de1f656b5763a16acf876fed6' and resultId = '50|dedup_wf_001::51b88f272ba9c3bb181af64e70255a80'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter("project = '40|aka_________::03376222b28a3aebf2730ac514818d04'") + .count()); - Assertions - .assertEquals( - 1, - resultExplodedProvenance - .filter( - "project = '40|aka_________::03376222b28a3aebf2730ac514818d04' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") - .count()); + assertEquals( + 1, + resultExplodedProvenance + .filter( + "project = '40|aka_________::03376222b28a3aebf2730ac514818d04' and resultId = '50|dedup_wf_001::e4805d005bfab0cd39a1642cbf477fdb'") + .count()); - Assertions - .assertEquals( - 3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); + assertEquals( + 3, resultExplodedProvenance.filter("provenance = 'sysimport:crosswalk:entityregistry'").count()); } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java index c6666342a..98902b618 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/QueryInformationSystemTest.java @@ -14,12 +14,13 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.xml.sax.SAXException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class QueryInformationSystemTest { +class QueryInformationSystemTest { private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " + @@ -66,7 +67,7 @@ public class QueryInformationSystemTest { private Map map; @BeforeEach - public void setUp() throws ISLookUpException, DocumentException { + public void setUp() throws ISLookUpException, DocumentException, SAXException { lenient().when(isLookUpService.quickSearchProfile(XQUERY)).thenReturn(communityMap); queryInformationSystem = new QueryInformationSystem(); queryInformationSystem.setIsLookUp(isLookUpService); @@ -74,13 +75,13 @@ public class QueryInformationSystemTest { } @Test - public void testSize() throws ISLookUpException { + void testSize() throws ISLookUpException { Assertions.assertEquals(23, map.size()); } @Test - public void testContent() { + void testContent() { Assertions.assertTrue(map.containsKey("egi") && map.get("egi").equals("EGI Federation")); Assertions.assertTrue(map.containsKey("fet-fp7") && map.get("fet-fp7").equals("FET FP7")); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java index 42ad5634a..2ea4bc91a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java @@ -62,7 +62,7 @@ public class SplitForCommunityTest { } @Test - public void test1() { + void test1() { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/splitForCommunity") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java index 20a46cee0..a164593ec 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/UpdateProjectInfoTest.java @@ -68,7 +68,7 @@ public class UpdateProjectInfoTest { } @Test - public void test1() throws Exception { + void test1() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/addProjectInfo") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java index 05dc423cb..8d06758a8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/ZenodoUploadTest.java @@ -32,7 +32,7 @@ public class ZenodoUploadTest { } @Test - public void testNewDeposition() throws IOException { + void testNewDeposition() throws IOException { CommunityMap communityMap = new CommunityMap(); communityMap.put("ni", "Neuroinformatics"); communityMap.put("dh-ch", "Digital Humanities and Cultural Heritage"); @@ -86,7 +86,7 @@ public class ZenodoUploadTest { } @Test - public void testNewVersion() throws IOException, MissingConceptDoiException { + void testNewVersion() throws IOException, MissingConceptDoiException { ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ACCESS_TOKEN); @@ -137,7 +137,7 @@ public class ZenodoUploadTest { } @Test - public void readCommunityMap() throws IOException { + void readCommunityMap() throws IOException { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); System.out .println( diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java index 3ecbd1894..f881e6b30 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java @@ -86,7 +86,7 @@ public class CreateEntityTest { } @Test - public void test1() throws ISLookUpException, IOException { + void test1() throws ISLookUpException, IOException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); queryInformationSystem.getContextInformation(consumer); @@ -144,7 +144,7 @@ public class CreateEntityTest { @Test @Disabled - public void test2() throws IOException, ISLookUpException { + void test2() throws IOException, ISLookUpException { LocalFileSystem fs = FileSystem.getLocal(new Configuration()); Path hdfsWritePath = new Path(workingDir + "/prova"); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java index b556fa2d6..69e550d45 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateRelationTest.java @@ -16,7 +16,7 @@ import eu.dnetlib.dhp.schema.dump.oaf.graph.Relation; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.utils.DHPUtils; -public class CreateRelationTest { +class CreateRelationTest { List communityContext = Arrays .asList( @@ -473,7 +473,7 @@ public class CreateRelationTest { } @Test - public void test1() { + void test1() { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java index 3d42f124e..e43383ef4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/ExtractRelationFromEntityTest.java @@ -60,7 +60,7 @@ public class ExtractRelationFromEntityTest { } @Test - public void test1() { + void test1() { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/singelRecord_pub.json") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java index 75d5a2673..eb5919fd5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/FunderParsingTest.java @@ -4,13 +4,14 @@ package eu.dnetlib.dhp.oa.graph.dump.complete; import org.dom4j.DocumentException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; import eu.dnetlib.dhp.schema.dump.oaf.graph.Funder; -public class FunderParsingTest { +class FunderParsingTest { @Test - public void testFunderTwoLevels() throws DocumentException { + void testFunderTwoLevels() throws DocumentException { String funding_Stream = "nsf_________::NSFNSFNational Science " + @@ -37,7 +38,7 @@ public class FunderParsingTest { } @Test - public void testFunderThreeeLevels() throws DocumentException { + void testFunderThreeeLevels() throws DocumentException, SAXException { String funding_stream = "ec__________::EC" + "EC" + "European Commission" + diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java index d769aa138..049959704 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/QueryInformationSystemTest.java @@ -17,7 +17,7 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class QueryInformationSystemTest { +class QueryInformationSystemTest { private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " + @@ -513,7 +513,7 @@ public class QueryInformationSystemTest { } @Test - public void testSizeEntity() throws ISLookUpException { + void testSizeEntity() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); @@ -523,7 +523,7 @@ public class QueryInformationSystemTest { } @Test - public void testSizeRelation() throws ISLookUpException { + void testSizeRelation() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); @@ -534,7 +534,7 @@ public class QueryInformationSystemTest { } @Test - public void testContentRelation() throws ISLookUpException { + void testContentRelation() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); @@ -572,7 +572,7 @@ public class QueryInformationSystemTest { } @Test - public void testContentEntity() throws ISLookUpException { + void testContentEntity() throws ISLookUpException { List cInfoList = new ArrayList<>(); final Consumer consumer = ci -> cInfoList.add(ci); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java index ea2dc73ca..50a9f26b4 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/RelationFromOrganizationTest.java @@ -69,7 +69,7 @@ public class RelationFromOrganizationTest { } @Test - public void test1() throws Exception { + void test1() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/relation") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java index 6c5ebbab3..d1e3b3acc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/ResultLinkedToProjectTest.java @@ -69,7 +69,7 @@ public class ResultLinkedToProjectTest { } @Test - public void testNoMatch() throws Exception { + void testNoMatch() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/nomatch/papers.json") @@ -102,7 +102,7 @@ public class ResultLinkedToProjectTest { } @Test - public void testMatchOne() throws Exception { + void testMatchOne() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/match/papers.json") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java index 71bf5d942..8ac0c552f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/funderresult/SplitPerFunderTest.java @@ -65,7 +65,7 @@ public class SplitPerFunderTest { } @Test - public void test1() throws Exception { + void test1() throws Exception { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/funderresource/extendeddump") diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java index 0089811cf..2d28ee305 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/merge/MergeGraphTableSparkJobTest.java @@ -15,7 +15,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.schema.oaf.Datasource; -public class MergeGraphTableSparkJobTest { +class MergeGraphTableSparkJobTest { private ObjectMapper mapper; @@ -25,7 +25,7 @@ public class MergeGraphTableSparkJobTest { } @Test - public void testMergeDatasources() throws IOException { + void testMergeDatasources() throws IOException { assertEquals( "openaire-cris_1.1", MergeGraphTableSparkJob diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java index e0d202209..1e974dd69 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java @@ -24,7 +24,7 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class GenerateEntitiesApplicationTest { +class GenerateEntitiesApplicationTest { @Mock private ISLookUpService isLookUpService; @@ -44,7 +44,7 @@ public class GenerateEntitiesApplicationTest { } @Test - public void testMergeResult() throws IOException { + void testMergeResult() throws IOException { Result publication = getResult("oaf_record.xml", Publication.class); Result dataset = getResult("odf_dataset.xml", Dataset.class); Result software = getResult("odf_software.xml", Software.class); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index c431b4dd8..000dbfe25 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -8,6 +8,7 @@ import static org.mockito.Mockito.lenient; import java.io.IOException; import java.util.List; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -21,7 +22,6 @@ import org.mockito.junit.jupiter.MockitoExtension; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; -import eu.dnetlib.dhp.oa.graph.clean.GraphCleaningFunctionsTest; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; @@ -29,7 +29,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) -public class MappersTest { +class MappersTest { @Mock private ISLookUpService isLookUpService; @@ -50,7 +50,7 @@ public class MappersTest { @Test void testPublication() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -71,7 +71,7 @@ public class MappersTest { assertValidId(p.getCollectedfrom().get(0).getKey()); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); assertFalse(p.getDataInfo().getInvisible()); - assertTrue(p.getSource().size() == 1); + assertEquals(1, p.getSource().size()); assertTrue(StringUtils.isNotBlank(p.getDateofcollection())); assertTrue(StringUtils.isNotBlank(p.getDateoftransformation())); @@ -88,7 +88,7 @@ public class MappersTest { .getPid() .stream() .findFirst() - .get(); + .orElseThrow(() -> new IllegalStateException("missing author pid")); assertEquals("0000-0001-6651-1178", pid.getValue()); assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid()); assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname()); @@ -108,7 +108,6 @@ public class MappersTest { assertTrue(p.getInstance().size() > 0); p .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("OPEN", i.getAccessright().getClassid()); @@ -141,17 +140,15 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(r2.getRelType())); assertTrue(r1.getValidated()); assertTrue(r2.getValidated()); - assertEquals(r1.getValidationDate(), "2020-01-01"); - assertEquals(r2.getValidationDate(), "2020-01-01"); - // System.out.println(new ObjectMapper().writeValueAsString(p)); - // System.out.println(new ObjectMapper().writeValueAsString(r1)); - // System.out.println(new ObjectMapper().writeValueAsString(r2)); + assertEquals("2020-01-01", r1.getValidationDate()); + assertEquals("2020-01-01", r2.getValidationDate()); } @Test void testPublication_PubMed() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record_pubmed.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record_pubmed.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -196,23 +193,22 @@ public class MappersTest { assertTrue(p.getSubject().size() > 0); assertTrue(p.getPid().size() > 0); - assertEquals(p.getPid().get(0).getValue(), "PMC1517292"); - assertEquals(p.getPid().get(0).getQualifier().getClassid(), "pmc"); + assertEquals("PMC1517292", p.getPid().get(0).getValue()); + assertEquals("pmc", p.getPid().get(0).getQualifier().getClassid()); assertNotNull(p.getInstance()); assertTrue(p.getInstance().size() > 0); p .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("OPEN", i.getAccessright().getClassid()); }); assertEquals("UNKNOWN", p.getInstance().get(0).getRefereed().getClassid()); assertNotNull(p.getInstance().get(0).getPid()); - assertTrue(p.getInstance().get(0).getPid().size() == 2); + assertEquals(2, p.getInstance().get(0).getPid().size()); - assertTrue(p.getInstance().get(0).getAlternateIdentifier().size() == 1); + assertEquals(1, p.getInstance().get(0).getAlternateIdentifier().size()); assertEquals("doi", p.getInstance().get(0).getAlternateIdentifier().get(0).getQualifier().getClassid()); assertEquals("10.3897/oneeco.2.e13718", p.getInstance().get(0).getAlternateIdentifier().get(0).getValue()); @@ -223,7 +219,7 @@ public class MappersTest { @Test void testPublicationInvisible() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record.xml"))); final List list = new OafToOafMapper(vocs, true, true).processMdRecord(xml); @@ -238,7 +234,7 @@ public class MappersTest { @Test void testDataset() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_dataset.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_dataset.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -281,12 +277,13 @@ public class MappersTest { .filter(a -> a.getPid() != null && !a.getPid().isEmpty()) .findFirst(); assertTrue(author.isPresent()); - final StructuredProperty pid = author + final Optional oPid = author .get() .getPid() .stream() - .findFirst() - .get(); + .findFirst(); + assertTrue(oPid.isPresent()); + final StructuredProperty pid = oPid.get(); assertEquals("0000-0001-9074-1619", pid.getValue()); assertEquals(ModelConstants.ORCID_PENDING, pid.getQualifier().getClassid()); assertEquals(ModelConstants.ORCID_CLASSNAME, pid.getQualifier().getClassname()); @@ -315,7 +312,6 @@ public class MappersTest { assertTrue(d.getInstance().size() > 0); d .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("OPEN", i.getAccessright().getClassid()); @@ -345,13 +341,14 @@ public class MappersTest { assertTrue(StringUtils.isNotBlank(r2.getRelType())); assertTrue(r1.getValidated()); assertTrue(r2.getValidated()); - assertEquals(r1.getValidationDate(), "2020-01-01"); - assertEquals(r2.getValidationDate(), "2020-01-01"); + assertEquals("2020-01-01", r1.getValidationDate()); + assertEquals("2020-01-01", r2.getValidationDate()); } @Test void testOdfBielefeld() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_bielefeld.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_bielefeld.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -389,7 +386,6 @@ public class MappersTest { assertTrue(p.getInstance().size() > 0); p .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("OPEN", i.getAccessright().getClassid()); @@ -399,7 +395,8 @@ public class MappersTest { @Test void testOpentrial() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_opentrial.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_opentrial.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -444,7 +441,7 @@ public class MappersTest { assertEquals(1, d.getDescription().size()); assertTrue(StringUtils.isNotBlank(d.getDescription().get(0).getValue())); - assertTrue(d.getAuthor().size() == 1); + assertEquals(1, d.getAuthor().size()); assertEquals("Jensen, Kristian K", d.getAuthor().get(0).getFullname()); assertEquals("Kristian K.", d.getAuthor().get(0).getName()); assertEquals("Jensen", d.getAuthor().get(0).getSurname()); @@ -462,7 +459,7 @@ public class MappersTest { assertTrue(d.getContext().isEmpty()); assertNotNull(d.getInstance()); - assertTrue(d.getInstance().size() == 1); + assertEquals(1, d.getInstance().size()); final Instance i = d.getInstance().get(0); @@ -515,7 +512,7 @@ public class MappersTest { @Test void testSoftware() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_software.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_software.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -532,22 +529,15 @@ public class MappersTest { assertTrue(s.getInstance().size() > 0); } - // @Test - void testDataset_2() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_dataset_2.xml")); - - final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); - - System.out.println("***************"); - System.out.println(new ObjectMapper().writeValueAsString(list)); - System.out.println("***************"); - } - @Test void testClaimDedup() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_claim_dedup.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_dedup.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); + assertNotNull(list); + assertFalse(list.isEmpty()); + System.out.println("***************"); System.out.println(new ObjectMapper().writeValueAsString(list)); System.out.println("***************"); @@ -555,7 +545,7 @@ public class MappersTest { @Test void testNakala() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_nakala.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_nakala.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -583,7 +573,7 @@ public class MappersTest { @Test void testEnermaps() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("enermaps.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("enermaps.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -608,7 +598,8 @@ public class MappersTest { @Test void testClaimFromCrossref() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_claim_crossref.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_crossref.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -624,7 +615,7 @@ public class MappersTest { @Test void testODFRecord() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_record.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_record.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); System.out.println(new ObjectMapper().writeValueAsString(list)); @@ -638,7 +629,7 @@ public class MappersTest { @Test void testTextGrid() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("textgrid.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("textgrid.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -652,9 +643,9 @@ public class MappersTest { assertEquals(1, p.getAuthor().size()); assertEquals("OPEN", p.getBestaccessright().getClassid()); - assertTrue(p.getPid().size() == 1); + assertEquals(1, p.getPid().size()); assertTrue(PidType.isValid(p.getPid().get(0).getQualifier().getClassid())); - assertTrue(PidType.handle.equals(PidType.valueOf(p.getPid().get(0).getQualifier().getClassid()))); + assertEquals(PidType.handle, PidType.valueOf(p.getPid().get(0).getQualifier().getClassid())); assertEquals("hdl:11858/00-1734-0000-0003-EE73-2", p.getPid().get(0).getValue()); assertEquals("dataset", p.getResulttype().getClassname()); assertEquals(1, p.getInstance().size()); @@ -672,7 +663,7 @@ public class MappersTest { @Test void testBologna() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf-bologna.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf-bologna.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -689,7 +680,7 @@ public class MappersTest { @Test void testJairo() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_jairo.xml")); + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_jairo.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -702,7 +693,7 @@ public class MappersTest { assertNotNull(p.getTitle()); assertFalse(p.getTitle().isEmpty()); - assertTrue(p.getTitle().size() == 1); + assertEquals(1, p.getTitle().size()); assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue())); final Publication p_cleaned = cleanup(fixVocabularyNames(p)); @@ -713,7 +704,8 @@ public class MappersTest { @Test void testOdfFromHdfs() throws IOException { - final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_from_hdfs.xml")); + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_from_hdfs.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -749,7 +741,6 @@ public class MappersTest { assertTrue(p.getInstance().size() > 0); p .getInstance() - .stream() .forEach(i -> { assertNotNull(i.getAccessright()); assertEquals("UNKNOWN", i.getAccessright().getClassid()); @@ -768,13 +759,16 @@ public class MappersTest { private List vocs() throws IOException { return IOUtils .readLines( - GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt")); + Objects + .requireNonNull(MappersTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/terms.txt"))); } private List synonyms() throws IOException { return IOUtils .readLines( - GraphCleaningFunctionsTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt")); + Objects + .requireNonNull( + MappersTest.class.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt"))); } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index d529d2eb2..69943435a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -32,7 +32,7 @@ import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; @ExtendWith(MockitoExtension.class) -public class MigrateDbEntitiesApplicationTest { +class MigrateDbEntitiesApplicationTest { private MigrateDbEntitiesApplication app; @@ -58,7 +58,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessDatasource() throws Exception { + void testProcessDatasource() throws Exception { final List fields = prepareMocks("datasources_resultset_entry.json"); final List list = app.processDatasource(rs); @@ -81,7 +81,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessProject() throws Exception { + void testProcessProject() throws Exception { final List fields = prepareMocks("projects_resultset_entry.json"); final List list = app.processProject(rs); @@ -99,7 +99,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessOrganization() throws Exception { + void testProcessOrganization() throws Exception { final List fields = prepareMocks("organizations_resultset_entry.json"); final List list = app.processOrganization(rs); @@ -126,7 +126,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessDatasourceOrganization() throws Exception { + void testProcessDatasourceOrganization() throws Exception { final List fields = prepareMocks("datasourceorganization_resultset_entry.json"); final List list = app.processDatasourceOrganization(rs); @@ -143,7 +143,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessProjectOrganization() throws Exception { + void testProcessProjectOrganization() throws Exception { final List fields = prepareMocks("projectorganization_resultset_entry.json"); final List list = app.processProjectOrganization(rs); @@ -162,7 +162,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessClaims_context() throws Exception { + void testProcessClaims_context() throws Exception { final List fields = prepareMocks("claimscontext_resultset_entry.json"); final List list = app.processClaims(rs); @@ -177,7 +177,7 @@ public class MigrateDbEntitiesApplicationTest { } @Test - public void testProcessClaims_rels() throws Exception { + void testProcessClaims_rels() throws Exception { final List fields = prepareMocks("claimsrel_resultset_entry.json"); final List list = app.processClaims(rs); @@ -208,9 +208,6 @@ public class MigrateDbEntitiesApplicationTest { assertValidId(r1.getCollectedfrom().get(0).getKey()); assertValidId(r2.getCollectedfrom().get(0).getKey()); - - // System.out.println(new ObjectMapper().writeValueAsString(r1)); - // System.out.println(new ObjectMapper().writeValueAsString(r2)); } private List prepareMocks(final String jsonFile) throws IOException, SQLException { @@ -273,7 +270,7 @@ public class MigrateDbEntitiesApplicationTest { final String[] values = ((List) tf.getValue()) .stream() .filter(Objects::nonNull) - .map(o -> o.toString()) + .map(Object::toString) .toArray(String[]::new); Mockito.when(arr.getArray()).thenReturn(values); @@ -334,6 +331,7 @@ public class MigrateDbEntitiesApplicationTest { return new Float(getValueAs(name, fields).toString()); } + @SuppressWarnings("unchecked") private T getValueAs(final String name, final List fields) { return fields .stream() diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java index fb2c90e5c..3b9616de3 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateMongoMdstoresApplicationTest.java @@ -26,8 +26,6 @@ import io.fares.junit.mongodb.MongoForAllExtension; @Disabled public class MigrateMongoMdstoresApplicationTest { - private static final Logger log = LoggerFactory.getLogger(MigrateMongoMdstoresApplicationTest.class); - public static final String COLL_NAME = "9eed8a4d-bb41-47c3-987f-9d06aee0dec0::1453898911558"; @RegisterExtension diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java index 3fd365416..c8158c044 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java @@ -75,7 +75,7 @@ public class PatchRelationApplicationTest { } @Test - public void testPatchRelationApplication() throws Exception { + void testPatchRelationApplication() throws Exception { final String graphBasePath = workingDir.toString() + "/graphBasePath"; PatchRelationsApplication.main(new String[] { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java index 110fabf45..ec059ad73 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/reflections/ReflectionTest.java @@ -9,7 +9,7 @@ import java.util.List; import org.junit.jupiter.api.Test; -public class ReflectionTest { +class ReflectionTest { private final Cleaner cleaner = new Cleaner(); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java index 7534ce4bd..3301577ee 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java @@ -46,9 +46,11 @@ public class CreateRelatedEntitiesJob_phase1 { String jsonConfiguration = IOUtils .toString( - PrepareRelationsJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json")); + Objects + .requireNonNull( + CreateRelatedEntitiesJob_phase1.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase1.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -146,7 +148,11 @@ public class CreateRelatedEntitiesJob_phase1 { Result result = (Result) entity; if (result.getTitle() != null && !result.getTitle().isEmpty()) { - final StructuredProperty title = result.getTitle().stream().findFirst().get(); + final StructuredProperty title = result + .getTitle() + .stream() + .findFirst() + .orElseThrow(() -> new IllegalStateException("missing title in " + entity.getId())); title.setValue(StringUtils.left(title.getValue(), ModelHardLimits.MAX_TITLE_LENGTH)); re.setTitle(title); } @@ -196,7 +202,7 @@ public class CreateRelatedEntitiesJob_phase1 { List> f = p.getFundingtree(); if (!f.isEmpty()) { - re.setFundingtree(f.stream().map(s -> s.getValue()).collect(Collectors.toList())); + re.setFundingtree(f.stream().map(Field::getValue).collect(Collectors.toList())); } break; } @@ -211,7 +217,7 @@ public class CreateRelatedEntitiesJob_phase1 { return Optional .ofNullable(f) .filter(Objects::nonNull) - .map(x -> x.getValue()) + .map(Field::getValue) .orElse(defaultValue); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java index c013a2bf6..85fb4a6b2 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase2.java @@ -49,9 +49,11 @@ public class CreateRelatedEntitiesJob_phase2 { String jsonConfiguration = IOUtils .toString( - PrepareRelationsJob.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase2.json")); + Objects + .requireNonNull( + CreateRelatedEntitiesJob_phase2.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/provision/input_params_related_entities_pahase2.json"))); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java index b3f785492..ae899c3d8 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java @@ -131,14 +131,14 @@ public class PrepareRelationsJob { Set relationFilter, int sourceMaxRelations, int targetMaxRelations, int relPartitions) { JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath) - .filter(rel -> rel.getDataInfo().getDeletedbyinference() == false) - .filter(rel -> relationFilter.contains(StringUtils.lowerCase(rel.getRelClass())) == false); + .filter(rel -> !rel.getDataInfo().getDeletedbyinference()) + .filter(rel -> !relationFilter.contains(StringUtils.lowerCase(rel.getRelClass()))); JavaRDD pruned = pruneRels( pruneRels( rels, - sourceMaxRelations, relPartitions, (Function) r -> r.getSource()), - targetMaxRelations, relPartitions, (Function) r -> r.getTarget()); + sourceMaxRelations, relPartitions, (Function) Relation::getSource), + targetMaxRelations, relPartitions, (Function) Relation::getTarget); spark .createDataset(pruned.rdd(), Encoders.bean(Relation.class)) .repartition(relPartitions) @@ -170,8 +170,8 @@ public class PrepareRelationsJob { .map( (MapFunction) s -> OBJECT_MAPPER.readValue(s, Relation.class), Encoders.kryo(Relation.class)) - .filter((FilterFunction) rel -> rel.getDataInfo().getDeletedbyinference() == false) - .filter((FilterFunction) rel -> relationFilter.contains(rel.getRelClass()) == false) + .filter((FilterFunction) rel -> !rel.getDataInfo().getDeletedbyinference()) + .filter((FilterFunction) rel -> !relationFilter.contains(rel.getRelClass())) .groupByKey( (MapFunction) Relation::getSource, Encoders.STRING()) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java index 28c1111d6..01d161b6b 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/ProvisionConstants.java @@ -3,6 +3,9 @@ package eu.dnetlib.dhp.oa.provision; public class ProvisionConstants { + private ProvisionConstants() { + } + public static final String LAYOUT = "index"; public static final String INTERPRETATION = "openaire"; public static final String SEPARATOR = "-"; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java index 410aff5ba..0033978bf 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplication.java @@ -17,7 +17,6 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient; import eu.dnetlib.dhp.oa.provision.utils.ZkServers; import eu.dnetlib.dhp.utils.ISLookupClientFactory; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class SolrAdminApplication implements Closeable { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java index b44ed7446..b383f67ef 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java @@ -22,7 +22,6 @@ import org.apache.spark.util.LongAccumulator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Maps; import eu.dnetlib.dhp.application.ArgumentApplicationParser; @@ -40,7 +39,7 @@ public class XmlConverterJob { private static final Logger log = LoggerFactory.getLogger(XmlConverterJob.class); - public static final String schemaLocation = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd"; + public static final String SCHEMA_LOCATION = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd"; public static void main(String[] args) throws Exception { @@ -95,7 +94,7 @@ public class XmlConverterJob { prepareAccumulators(spark.sparkContext()), contextMapper, false, - schemaLocation, + SCHEMA_LOCATION, otherDsTypeId); final List paths = HdfsSupport @@ -186,7 +185,7 @@ public class XmlConverterJob { accumulators .put( "organizationOrganization_dedup_merges", - sc.longAccumulator("resultProject_outcome_produces")); + sc.longAccumulator("organizationOrganization_dedup_merges")); accumulators .put( "datasourceOrganization_provision_isProvidedBy", diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java index a321bdba9..e7dbdbd2b 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJob.java @@ -3,14 +3,12 @@ package eu.dnetlib.dhp.oa.provision; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.io.IOException; import java.io.StringReader; import java.io.StringWriter; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Optional; -import javax.swing.text.html.Option; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.stream.StreamResult; @@ -163,7 +161,7 @@ public class XmlIndexingJob { case HDFS: spark .createDataset( - docs.map(s -> new SerializableSolrInputDocument(s)).rdd(), + docs.map(SerializableSolrInputDocument::new).rdd(), Encoders.kryo(SerializableSolrInputDocument.class)) .write() .mode(SaveMode.Overwrite) @@ -174,14 +172,13 @@ public class XmlIndexingJob { } } - protected static String toIndexRecord(Transformer tr, final String record) { + protected static String toIndexRecord(Transformer tr, final String xmlRecord) { final StreamResult res = new StreamResult(new StringWriter()); try { - tr.transform(new StreamSource(new StringReader(record)), res); + tr.transform(new StreamSource(new StringReader(xmlRecord)), res); return res.getWriter().toString(); - } catch (Throwable e) { - log.error("XPathException on record: \n {}", record, e); - throw new IllegalArgumentException(e); + } catch (TransformerException e) { + throw new IllegalArgumentException("XPathException on record: \n" + xmlRecord, e); } } @@ -192,8 +189,6 @@ public class XmlIndexingJob { * @param xslt xslt for building the index record transformer * @param fields the list of fields * @return the javax.xml.transform.Transformer - * @throws ISLookUpException could happen - * @throws IOException could happen * @throws TransformerException could happen */ protected static String getLayoutTransformer(String format, String fields, String xslt) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java index 2eb9cf38b..0fb109fbb 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java @@ -2,7 +2,6 @@ package eu.dnetlib.dhp.oa.provision.model; import java.io.Serializable; -import java.util.ArrayList; import java.util.LinkedList; import java.util.List; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java index c09ed86e5..d4ee24c14 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java @@ -11,6 +11,9 @@ import eu.dnetlib.dhp.schema.common.ModelSupport; public class ProvisionModelSupport { + private ProvisionModelSupport() { + } + public static Class[] getModelClasses() { List> modelClasses = Lists.newArrayList(ModelSupport.getOafModelClasses()); modelClasses diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java index e15ceff76..5c78d1826 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/RelatedEntity.java @@ -34,7 +34,6 @@ public class RelatedEntity implements Serializable { private Qualifier datasourcetype; private Qualifier datasourcetypeui; private Qualifier openairecompatibility; - // private String aggregatortype; // organization private String legalname; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java index 7391569ed..a91050403 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/AuthorPidTypeComparator.java @@ -5,7 +5,6 @@ import java.util.Comparator; import java.util.Optional; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java index ac418f2b9..bcaf40603 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java @@ -9,6 +9,7 @@ import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; +import org.xml.sax.SAXException; import com.google.common.base.Joiner; @@ -23,7 +24,7 @@ public class ContextMapper extends HashMap implements Serial private static final String XQUERY = "for $x in //RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ContextDSResourceType']//*[name()='context' or name()='category' or name()='concept'] return "; public static ContextMapper fromIS(final String isLookupUrl) - throws DocumentException, ISLookUpException { + throws DocumentException, ISLookUpException, SAXException { ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); StringBuilder sb = new StringBuilder(""); Joiner.on("").appendTo(sb, isLookUp.quickSearchProfile(XQUERY)); @@ -31,10 +32,12 @@ public class ContextMapper extends HashMap implements Serial return fromXml(sb.toString()); } - public static ContextMapper fromXml(final String xml) throws DocumentException { + public static ContextMapper fromXml(final String xml) throws DocumentException, SAXException { final ContextMapper contextMapper = new ContextMapper(); - final Document doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + final Document doc = reader.read(new StringReader(xml)); for (Object o : doc.selectNodes("//entry")) { Node node = (Node) o; String id = node.valueOf("./@id"); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java index d2131ef28..3750d0173 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/GraphMappingUtils.java @@ -8,16 +8,18 @@ import java.util.Set; import com.google.common.collect.Sets; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.*; public class GraphMappingUtils { public static final String SEPARATOR = "_"; - public static Set authorPidTypes = Sets + public static final Set authorPidTypes = Sets .newHashSet( ModelConstants.ORCID, ModelConstants.ORCID_PENDING, "magidentifier"); + private GraphMappingUtils() { + } + public static String removePrefix(final String s) { if (s.contains("|")) return substringAfter(s, "|"); diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java index 29a51cb29..8c7c61361 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ISLookupClient.java @@ -25,11 +25,9 @@ public class ISLookupClient { * * @param format the Metadata format name * @return the string representation of the list of fields to be indexed - * @throws ISLookUpDocumentNotFoundException * @throws ISLookUpException */ - public String getLayoutSource(final String format) - throws ISLookUpDocumentNotFoundException, ISLookUpException { + public String getLayoutSource(final String format) throws ISLookUpException { return doLookup( String .format( @@ -41,7 +39,6 @@ public class ISLookupClient { * Method retrieves from the information system the openaireLayoutToRecordStylesheet * * @return the string representation of the XSLT contained in the transformation rule profile - * @throws ISLookUpDocumentNotFoundException * @throws ISLookUpException */ public String getLayoutTransformer() throws ISLookUpException { @@ -78,9 +75,9 @@ public class ISLookupClient { } private String doLookup(String xquery) throws ISLookUpException { - log.info(String.format("running xquery: %s", xquery)); + log.info("running xquery: {}", xquery); final String res = getIsLookup().getResourceProfileByQuery(xquery); - log.info(String.format("got response (100 chars): %s", StringUtils.left(res, 100) + " ...")); + log.info("got response (100 chars): {} ...", StringUtils.left(res, 100)); return res; } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/LicenseComparator.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/LicenseComparator.java deleted file mode 100644 index 9dbac1936..000000000 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/LicenseComparator.java +++ /dev/null @@ -1,69 +0,0 @@ - -package eu.dnetlib.dhp.oa.provision.utils; - -import java.util.Comparator; - -import eu.dnetlib.dhp.schema.oaf.Qualifier; - -public class LicenseComparator implements Comparator { - - @Override - public int compare(Qualifier left, Qualifier right) { - - if (left == null && right == null) - return 0; - if (left == null) - return 1; - if (right == null) - return -1; - - String lClass = left.getClassid(); - String rClass = right.getClassid(); - - if (lClass.equals(rClass)) - return 0; - - if (lClass.equals("OPEN SOURCE")) - return -1; - if (rClass.equals("OPEN SOURCE")) - return 1; - - if (lClass.equals("OPEN")) - return -1; - if (rClass.equals("OPEN")) - return 1; - - if (lClass.equals("6MONTHS")) - return -1; - if (rClass.equals("6MONTHS")) - return 1; - - if (lClass.equals("12MONTHS")) - return -1; - if (rClass.equals("12MONTHS")) - return 1; - - if (lClass.equals("EMBARGO")) - return -1; - if (rClass.equals("EMBARGO")) - return 1; - - if (lClass.equals("RESTRICTED")) - return -1; - if (rClass.equals("RESTRICTED")) - return 1; - - if (lClass.equals("CLOSED")) - return -1; - if (rClass.equals("CLOSED")) - return 1; - - if (lClass.equals("UNKNOWN")) - return -1; - if (rClass.equals("UNKNOWN")) - return 1; - - // Else (but unlikely), lexicographical ordering will do. - return lClass.compareTo(rClass); - } -} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java index f16ee260f..36028be9e 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/StreamingInputDocumentFactory.java @@ -3,10 +3,10 @@ package eu.dnetlib.dhp.oa.provision.utils; import java.io.StringReader; import java.io.StringWriter; -import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Objects; import javax.xml.stream.*; import javax.xml.stream.events.Namespace; @@ -57,13 +57,13 @@ public class StreamingInputDocumentFactory { private static final int MAX_FIELD_LENGTH = 25000; private final ThreadLocal inputFactory = ThreadLocal - .withInitial(() -> XMLInputFactory.newInstance()); + .withInitial(XMLInputFactory::newInstance); private final ThreadLocal outputFactory = ThreadLocal - .withInitial(() -> XMLOutputFactory.newInstance()); + .withInitial(XMLOutputFactory::newInstance); private final ThreadLocal eventFactory = ThreadLocal - .withInitial(() -> XMLEventFactory.newInstance()); + .withInitial(XMLEventFactory::newInstance); private final String version; @@ -126,6 +126,8 @@ public class StreamingInputDocumentFactory { return indexDocument; } catch (XMLStreamException e) { throw new IllegalStateException(e); + } finally { + inputFactory.remove(); } } @@ -143,9 +145,9 @@ public class StreamingInputDocumentFactory { /** * Parse the targetFields block and add fields to the solr document. * - * @param indexDocument - * @param parser - * @throws XMLStreamException + * @param indexDocument the document being populated + * @param parser the XML parser + * @throws XMLStreamException when the parser cannot parse the XML */ protected void parseTargetFields( final SolrInputDocument indexDocument, final XMLEventReader parser) @@ -165,9 +167,10 @@ public class StreamingInputDocumentFactory { final XMLEvent text = parser.nextEvent(); String data = getText(text); - - addField(indexDocument, fieldName, data); - hasFields = true; + if (Objects.nonNull(data)) { + addField(indexDocument, fieldName, data); + hasFields = true; + } } } @@ -192,36 +195,43 @@ public class StreamingInputDocumentFactory { final List nsList, final String dnetResult) throws XMLStreamException { + final XMLEventWriter writer = outputFactory.get().createXMLEventWriter(results); + final XMLEventFactory xmlEventFactory = this.eventFactory.get(); + try { - for (Namespace ns : nsList) { - eventFactory.get().createNamespace(ns.getPrefix(), ns.getNamespaceURI()); - } - - StartElement newRecord = eventFactory.get().createStartElement("", null, RESULT, null, nsList.iterator()); - - // new root record - writer.add(newRecord); - - // copy the rest as it is - while (parser.hasNext()) { - final XMLEvent resultEvent = parser.nextEvent(); - - // TODO: replace with depth tracking instead of close tag tracking. - if (resultEvent.isEndElement() - && resultEvent.asEndElement().getName().getLocalPart().equals(dnetResult)) { - writer.add(eventFactory.get().createEndElement("", null, RESULT)); - break; + for (Namespace ns : nsList) { + xmlEventFactory.createNamespace(ns.getPrefix(), ns.getNamespaceURI()); } - writer.add(resultEvent); + StartElement newRecord = xmlEventFactory.createStartElement("", null, RESULT, null, nsList.iterator()); + + // new root record + writer.add(newRecord); + + // copy the rest as it is + while (parser.hasNext()) { + final XMLEvent resultEvent = parser.nextEvent(); + + // TODO: replace with depth tracking instead of close tag tracking. + if (resultEvent.isEndElement() + && resultEvent.asEndElement().getName().getLocalPart().equals(dnetResult)) { + writer.add(xmlEventFactory.createEndElement("", null, RESULT)); + break; + } + + writer.add(resultEvent); + } + writer.close(); + indexDocument.addField(INDEX_RESULT, results.toString()); + } finally { + outputFactory.remove(); + eventFactory.remove(); } - writer.close(); - indexDocument.addField(INDEX_RESULT, results.toString()); } /** - * Helper used to add a field to a solr doc. It avoids to add empy fields + * Helper used to add a field to a solr doc, avoids adding empty fields * * @param indexDocument * @param field @@ -231,7 +241,6 @@ public class StreamingInputDocumentFactory { final SolrInputDocument indexDocument, final String field, final String value) { String cleaned = value.trim(); if (!cleaned.isEmpty()) { - // log.info("\n\n adding field " + field.toLowerCase() + " value: " + cleaned + "\n"); indexDocument.addField(field.toLowerCase(), cleaned); } } @@ -243,9 +252,9 @@ public class StreamingInputDocumentFactory { * @return the */ protected final String getText(final XMLEvent text) { - if (text.isEndElement()) // log.warn("skipping because isEndOfElement " + - // text.asEndElement().getName().getLocalPart()); + if (text.isEndElement()) { return ""; + } final String data = text.asCharacters().getData(); if (data != null && data.length() > MAX_FIELD_LENGTH) { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java index 173ba326a..7487f0956 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/TemplateFactory.java @@ -50,7 +50,7 @@ public class TemplateFactory { public String getChild(final String name, final String id, final List metadata) { return getTemplate(resources.getChild()) .add("name", name) - .add("hasId", !(id == null)) + .add("hasId", id != null) .add("id", id != null ? escapeXml(removePrefix(id)) : "") .add("metadata", metadata) .render(); @@ -103,7 +103,7 @@ public class TemplateFactory { (webresources != null ? webresources : new ArrayList()) .stream() .filter(StringUtils::isNotBlank) - .map(w -> getWebResource(w)) + .map(this::getWebResource) .collect(Collectors.toList())) .render(); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 2c8240290..631335376 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -25,8 +25,8 @@ import org.dom4j.Node; import org.dom4j.io.OutputFormat; import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; +import org.xml.sax.SAXException; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.collect.Lists; @@ -46,6 +46,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class XmlRecordFactory implements Serializable { + public static final String DISALLOW_DOCTYPE_DECL = "http://apache.org/xml/features/disallow-doctype-decl"; private final Map accumulators; private final Set specialDatasourceTypes; @@ -56,8 +57,6 @@ public class XmlRecordFactory implements Serializable { private boolean indent = false; - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - public XmlRecordFactory( final ContextMapper contextMapper, final boolean indent, @@ -86,7 +85,6 @@ public class XmlRecordFactory implements Serializable { final Set contexts = Sets.newHashSet(); - // final OafEntity entity = toOafEntity(je.getEntity()); OafEntity entity = je.getEntity(); TemplateFactory templateFactory = new TemplateFactory(); try { @@ -95,8 +93,7 @@ public class XmlRecordFactory implements Serializable { final List metadata = metadata(type, entity, contexts); // rels has to be processed before the contexts because they enrich the contextMap with - // the - // funding info. + // the funding info. final List links = je.getLinks(); final List relations = links .stream() @@ -122,34 +119,11 @@ public class XmlRecordFactory implements Serializable { } } - private static OafEntity parseOaf(final String json, final String type) { - try { - switch (EntityType.valueOf(type)) { - case publication: - return OBJECT_MAPPER.readValue(json, Publication.class); - case dataset: - return OBJECT_MAPPER.readValue(json, Dataset.class); - case otherresearchproduct: - return OBJECT_MAPPER.readValue(json, OtherResearchProduct.class); - case software: - return OBJECT_MAPPER.readValue(json, Software.class); - case datasource: - return OBJECT_MAPPER.readValue(json, Datasource.class); - case organization: - return OBJECT_MAPPER.readValue(json, Organization.class); - case project: - return OBJECT_MAPPER.readValue(json, Project.class); - default: - throw new IllegalArgumentException("invalid type: " + type); - } - } catch (IOException e) { - throw new IllegalArgumentException(e); - } - } - private String printXML(String xml, boolean indent) { try { - final Document doc = new SAXReader().read(new StringReader(xml)); + final SAXReader reader = new SAXReader(); + reader.setFeature(DISALLOW_DOCTYPE_DECL, true); + final Document doc = reader.read(new StringReader(xml)); OutputFormat format = indent ? OutputFormat.createPrettyPrint() : OutputFormat.createCompactFormat(); format.setExpandEmptyElements(false); format.setSuppressDeclaration(true); @@ -157,7 +131,7 @@ public class XmlRecordFactory implements Serializable { XMLWriter writer = new XMLWriter(sw, format); writer.write(doc); return sw.toString(); - } catch (IOException | DocumentException e) { + } catch (IOException | DocumentException | SAXException e) { throw new IllegalArgumentException("Unable to indent XML. Invalid record:\n" + xml, e); } } @@ -203,7 +177,7 @@ public class XmlRecordFactory implements Serializable { final Result r = (Result) entity; if (r.getContext() != null) { - contexts.addAll(r.getContext().stream().map(c -> c.getId()).collect(Collectors.toList())); + contexts.addAll(r.getContext().stream().map(Context::getId).collect(Collectors.toList())); /* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */ if (contexts.contains("dh-ch::subcommunity::2")) { contexts.add("clarin"); @@ -260,14 +234,14 @@ public class XmlRecordFactory implements Serializable { .collect( Collectors .groupingBy( - p -> p.getValue(), + StructuredProperty::getValue, Collectors .mapping( p -> p, Collectors.minBy(new AuthorPidTypeComparator())))) .values() .stream() - .map(op -> op.get()) + .map(Optional::get) .forEach( sp -> { String pidType = getAuthorPidType(sp.getQualifier().getClassid()); @@ -938,7 +912,7 @@ public class XmlRecordFactory implements Serializable { .getFundingtree() .stream() .filter(Objects::nonNull) - .map(ft -> ft.getValue()) + .map(Field::getValue) .collect(Collectors.toList())); } @@ -1069,7 +1043,7 @@ public class XmlRecordFactory implements Serializable { .getFundingtree() .stream() .peek(ft -> fillContextMap(ft, contexts)) - .map(ft -> getRelFundingTree(ft)) + .map(XmlRecordFactory::getRelFundingTree) .collect(Collectors.toList())); } break; @@ -1112,7 +1086,7 @@ public class XmlRecordFactory implements Serializable { final List links = je.getLinks(); List children = links .stream() - .filter(link -> isDuplicate(link)) + .filter(this::isDuplicate) .map(link -> { final String targetType = link.getTarget().getType(); final String name = ModelSupport.getMainType(EntityType.valueOf(targetType)); @@ -1263,7 +1237,7 @@ public class XmlRecordFactory implements Serializable { return extraInfo != null ? extraInfo .stream() - .map(e -> XmlSerializationUtils.mapExtraInfo(e)) + .map(XmlSerializationUtils::mapExtraInfo) .collect(Collectors.toList()) : Lists.newArrayList(); } @@ -1287,9 +1261,6 @@ public class XmlRecordFactory implements Serializable { if (def == null) { continue; - // throw new IllegalStateException(String.format("cannot find context for id - // '%s'", - // id)); } if (def.getName().equals("context")) { @@ -1327,7 +1298,9 @@ public class XmlRecordFactory implements Serializable { private Transformer getTransformer() { try { - Transformer transformer = TransformerFactory.newInstance().newTransformer(); + final TransformerFactory factory = TransformerFactory.newInstance(); + factory.setFeature(DISALLOW_DOCTYPE_DECL, true); + Transformer transformer = factory.newTransformer(); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); return transformer; } catch (TransformerConfigurationException e) { @@ -1354,8 +1327,10 @@ public class XmlRecordFactory implements Serializable { Document fundingPath; try { - fundingPath = new SAXReader().read(new StringReader(xmlTree)); - } catch (final DocumentException e) { + final SAXReader reader = new SAXReader(); + reader.setFeature(DISALLOW_DOCTYPE_DECL, true); + fundingPath = reader.read(new StringReader(xmlTree)); + } catch (final DocumentException | SAXException e) { throw new RuntimeException(e); } try { @@ -1407,7 +1382,9 @@ public class XmlRecordFactory implements Serializable { protected static String getRelFundingTree(final String xmlTree) { String funding = ""; try { - final Document ftree = new SAXReader().read(new StringReader(xmlTree)); + final SAXReader reader = new SAXReader(); + reader.setFeature(DISALLOW_DOCTYPE_DECL, true); + final Document ftree = reader.read(new StringReader(xmlTree)); funding = ""; funding += getFunderElement(ftree); @@ -1427,7 +1404,7 @@ public class XmlRecordFactory implements Serializable { + e.getName() + ">"; } - } catch (final DocumentException e) { + } catch (final DocumentException | SAXException e) { throw new IllegalArgumentException( "unable to parse funding tree: " + xmlTree + "\n" + e.getMessage()); } finally { diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java index 8195467b1..213a62b32 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlSerializationUtils.java @@ -11,9 +11,12 @@ public class XmlSerializationUtils { // XML 1.0 // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] - private static final String xml10pattern = "[^" + "\u0009\r\n" + "\u0020-\uD7FF" + "\uE000-\uFFFD" + private static final String XML_10_PATTERN = "[^" + "\u0009\r\n" + "\u0020-\uD7FF" + "\uE000-\uFFFD" + "\ud800\udc00-\udbff\udfff" + "]"; + private XmlSerializationUtils() { + } + public static String mapJournal(Journal j) { final String attrs = new StringBuilder() .append(attr("issn", j.getIssnPrinted())) @@ -50,12 +53,12 @@ public class XmlSerializationUtils { public static String escapeXml(final String value) { return value - .replaceAll("&", "&") - .replaceAll("<", "<") - .replaceAll(">", ">") - .replaceAll("\"", """) - .replaceAll("'", "'") - .replaceAll(xml10pattern, ""); + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace("\"", """) + .replace("'", "'") + .replaceAll(XML_10_PATTERN, ""); } public static String parseDataInfo(final DataInfo dataInfo) { @@ -70,18 +73,6 @@ public class XmlSerializationUtils { .toString(); } - private static StringBuilder dataInfoAsAttributes(final StringBuilder sb, final DataInfo info) { - return sb - .append( - attr("inferred", info.getInferred() != null ? info.getInferred().toString() : "")) - .append(attr("inferenceprovenance", info.getInferenceprovenance())) - .append( - attr( - "provenanceaction", - info.getProvenanceaction() != null ? info.getProvenanceaction().getClassid() : "")) - .append(attr("trust", info.getTrust())); - } - public static String mapKeyValue(final String name, final KeyValue kv) { return new StringBuilder() .append("<") diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java index 6cec3ed53..903150ca8 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ZkServers.java @@ -25,7 +25,7 @@ public class ZkServers { // quorum0:2182,quorum1:2182,quorum2:2182,quorum3:2182,quorum4:2182/solr-dev-openaire String urls = zkUrl; final Optional chRoot = Optional.of(SEPARATOR + StringUtils.substringAfterLast(zkUrl, SEPARATOR)); - if (chRoot.isPresent() && StringUtils.isNotBlank(chRoot.get())) { + if (StringUtils.isNotBlank(chRoot.get())) { log.debug(String.format("found zk chroot %s", chRoot)); urls = zkUrl.replace(chRoot.get(), ""); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java index ffeb0995d..e5faccd0f 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/DropAndCreateESIndex.java @@ -45,6 +45,7 @@ public class DropAndCreateESIndex { .requireNonNull( DropAndCreateESIndex.class.getResourceAsStream("/eu/dnetlib/dhp/sx/provision/cluster.json"))); + @SuppressWarnings("unchecked") Map clusterMap = new ObjectMapper().readValue(clusterJson, Map.class); final String ip = clusterMap.get(cluster).split(",")[0]; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java index 3f842ef34..dd08215d5 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/sx/provision/SparkIndexCollectionOnES.java @@ -44,6 +44,7 @@ public class SparkIndexCollectionOnES { .requireNonNull( DropAndCreateESIndex.class.getResourceAsStream("/eu/dnetlib/dhp/sx/provision/cluster.json"))); + @SuppressWarnings("unchecked") final Map clusterMap = new ObjectMapper().readValue(clusterJson, Map.class); final SparkSession spark = SparkSession.builder().config(conf).getOrCreate(); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index e1ed4ffe4..9f022454b 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -4,7 +4,7 @@ package eu.dnetlib.dhp.oa.provision; import static org.junit.jupiter.api.Assertions.assertNotNull; import java.io.IOException; -import java.util.List; +import java.util.Objects; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; @@ -16,11 +16,9 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; -import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory; @@ -35,7 +33,7 @@ import eu.dnetlib.dhp.utils.saxon.SaxonTransformerFactory; * * The input is a JoinedEntity, i.e. a json representation of an OpenAIRE entity that embeds all the linked entities. */ -public class IndexRecordTransformerTest { +class IndexRecordTransformerTest { public static final String VERSION = "2021-04-15T10:05:53Z"; public static final String DSID = "b9ee796a-c49f-4473-a708-e7d67b84c16d_SW5kZXhEU1Jlc291cmNlcy9JbmRleERTUmVzb3VyY2VUeXBl"; @@ -48,23 +46,23 @@ public class IndexRecordTransformerTest { } @Test - public void testPreBuiltRecordTransformation() throws IOException, TransformerException { - String record = IOUtils.toString(getClass().getResourceAsStream("record.xml")); + void testPreBuiltRecordTransformation() throws IOException, TransformerException { + String record = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("record.xml"))); testRecordTransformation(record); } @Test - public void testPublicationRecordTransformation() throws IOException, TransformerException { + void testPublicationRecordTransformation() throws IOException, TransformerException { - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, XmlRecordFactoryTest.otherDsTypeId); Publication p = load("publication.json", Publication.class); Project pj = load("project.json", Project.class); Relation rel = load("relToValidatedProject.json", Relation.class); - JoinedEntity je = new JoinedEntity<>(p); + JoinedEntity je = new JoinedEntity<>(p); je .setLinks( Lists @@ -80,8 +78,9 @@ public class IndexRecordTransformerTest { } private void testRecordTransformation(String record) throws IOException, TransformerException { - String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); - String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); + String fields = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("fields.xml"))); + String xslt = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("layoutToRecordTransformer.xsl"))); String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt); @@ -99,7 +98,7 @@ public class IndexRecordTransformerTest { private T load(String fileName, Class clazz) throws IOException { return XmlRecordFactoryTest.OBJECT_MAPPER - .readValue(IOUtils.toString(getClass().getResourceAsStream(fileName)), clazz); + .readValue(IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream(fileName))), clazz); } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java index 6818cf6a5..c22a24185 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJobTest.java @@ -1,6 +1,9 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -62,7 +65,7 @@ public class PrepareRelationsJobTest { } @Test - public void testRunPrepareRelationsJob(@TempDir Path testPath) throws Exception { + void testRunPrepareRelationsJob(@TempDir Path testPath) throws Exception { final int maxRelations = 20; PrepareRelationsJob @@ -83,7 +86,7 @@ public class PrepareRelationsJobTest { .as(Encoders.bean(Relation.class)) .cache(); - Assertions.assertEquals(maxRelations, out.count()); + assertEquals(maxRelations, out.count()); Dataset freq = out .toDF() @@ -97,13 +100,13 @@ public class PrepareRelationsJobTest { long participation = getRows(freq, PARTICIPATION).get(0).getAs("count"); long affiliation = getRows(freq, AFFILIATION).get(0).getAs("count"); - Assertions.assertTrue(participation == outcome); - Assertions.assertTrue(outcome > affiliation); - Assertions.assertTrue(participation > affiliation); + assertEquals(outcome, participation); + assertTrue(outcome > affiliation); + assertTrue(participation > affiliation); - Assertions.assertEquals(7, outcome); - Assertions.assertEquals(7, participation); - Assertions.assertEquals(6, affiliation); + assertEquals(7, outcome); + assertEquals(7, participation); + assertEquals(6, affiliation); } protected List getRows(Dataset freq, String col) { diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java index f57b8dcaf..9d5bff3cf 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SolrAdminApplicationTest.java @@ -1,39 +1,42 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + import org.apache.solr.client.solrj.response.SolrPingResponse; import org.apache.solr.client.solrj.response.UpdateResponse; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -public class SolrAdminApplicationTest extends SolrTest { +class SolrAdminApplicationTest extends SolrTest { @Test - public void testPing() throws Exception { + void testPing() throws Exception { SolrPingResponse pingResponse = miniCluster.getSolrClient().ping(); log.info("pingResponse: '{}'", pingResponse.getStatus()); - Assertions.assertTrue(pingResponse.getStatus() == 0); + assertEquals(0, pingResponse.getStatus()); } @Test - public void testAdminApplication_DELETE() throws Exception { + void testAdminApplication_DELETE() throws Exception { SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost()); UpdateResponse rsp = (UpdateResponse) admin .execute(SolrAdminApplication.Action.DELETE_BY_QUERY, DEFAULT_COLLECTION, "*:*", false); - Assertions.assertTrue(rsp.getStatus() == 0); + assertEquals(0, rsp.getStatus()); } @Test - public void testAdminApplication_COMMIT() throws Exception { + void testAdminApplication_COMMIT() throws Exception { SolrAdminApplication admin = new SolrAdminApplication(miniCluster.getSolrClient().getZkHost()); UpdateResponse rsp = (UpdateResponse) admin.commit(DEFAULT_COLLECTION); - Assertions.assertTrue(rsp.getStatus() == 0); + assertEquals(0, rsp.getStatus()); } } diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java index 72f28fdf2..dc0a40471 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/SortableRelationKeyTest.java @@ -28,7 +28,6 @@ public class SortableRelationKeyTest { .map(r -> SortableRelationKey.create(r, r.getSource())) .sorted() .forEach( - it -> { try { System.out.println(mapper.writeValueAsString(it)); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java index d7bcb3185..6f1956578 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java @@ -89,7 +89,7 @@ public class XmlIndexingJobTest extends SolrTest { } @Test - public void testXmlIndexingJob_onSolr() throws Exception { + void testXmlIndexingJob_onSolr() throws Exception { String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; @@ -112,7 +112,7 @@ public class XmlIndexingJobTest extends SolrTest { } @Test - public void testXmlIndexingJob_saveOnHDFS() throws Exception { + void testXmlIndexingJob_saveOnHDFS() throws Exception { final String ID_XPATH = "//header/*[local-name()='objIdentifier']"; String inputPath = "src/test/resources/eu/dnetlib/dhp/oa/provision/xml"; diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index 221049f90..d89b3b068 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -13,6 +13,7 @@ import org.dom4j.DocumentException; import org.dom4j.io.SAXReader; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; @@ -21,7 +22,6 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; -import eu.dnetlib.dhp.oa.provision.utils.ContextDef; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; import eu.dnetlib.dhp.schema.oaf.Dataset; @@ -29,7 +29,7 @@ import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Relation; -public class XmlRecordFactoryTest { +class XmlRecordFactoryTest { public static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource"; @@ -37,11 +37,11 @@ public class XmlRecordFactoryTest { .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @Test - public void testXMLRecordFactory() throws IOException, DocumentException { + void testXMLRecordFactory() throws IOException, DocumentException { ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, otherDsTypeId); Publication p = OBJECT_MAPPER @@ -72,11 +72,11 @@ public class XmlRecordFactoryTest { } @Test - public void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException { + void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException { ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, otherDsTypeId); Publication p = OBJECT_MAPPER @@ -104,11 +104,11 @@ public class XmlRecordFactoryTest { } @Test - public void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException { + void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException { ContextMapper contextMapper = new ContextMapper(); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, otherDsTypeId); Publication p = OBJECT_MAPPER @@ -135,7 +135,7 @@ public class XmlRecordFactoryTest { } @Test - public void testEnermapsRecord() throws IOException, DocumentException { + void testEnermapsRecord() throws IOException, DocumentException, SAXException { String contextmap = "" + @@ -144,7 +144,7 @@ public class XmlRecordFactoryTest { ""; ContextMapper contextMapper = ContextMapper.fromXml(contextmap); - XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation, + XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.SCHEMA_LOCATION, otherDsTypeId); Dataset d = OBJECT_MAPPER diff --git a/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java b/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java index 5b2e6804b..93f1bc087 100644 --- a/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java +++ b/dhp-workflows/dhp-usage-raw-data-update/src/main/java/eu/dnetlib/oa/graph/usagerawdata/export/ConnectDB.java @@ -7,16 +7,8 @@ package eu.dnetlib.oa.graph.usagerawdata.export; import java.sql.Connection; -import java.sql.DriverManager; import java.sql.SQLException; -import java.sql.Statement; -import java.util.Properties; -import org.apache.log4j.Logger; - -/** - * @author D. Pierrakos, S. Zoupanos - */ /** * @author D. Pierrakos, S. Zoupanos */ @@ -31,7 +23,9 @@ public abstract class ConnectDB { private static String dbImpalaUrl; private static String usageStatsDBSchema; private static String statsDBSchema; - private final static Logger log = Logger.getLogger(ConnectDB.class); + + private ConnectDB() { + } static void init() throws ClassNotFoundException { @@ -72,10 +66,6 @@ public abstract class ConnectDB { } private static Connection connectHive() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbHiveUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbHiveUrl); cpds.setAcquireIncrement(1); @@ -97,10 +87,6 @@ public abstract class ConnectDB { } private static Connection connectImpala() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbImpalaUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbImpalaUrl); cpds.setAcquireIncrement(1); diff --git a/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java b/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java index e53709f1a..afd7f9807 100644 --- a/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java +++ b/dhp-workflows/dhp-usage-stats-build/src/main/java/eu/dnetlib/oa/graph/usagestatsbuild/export/ConnectDB.java @@ -7,20 +7,14 @@ package eu.dnetlib.oa.graph.usagestatsbuild.export; import java.sql.Connection; -import java.sql.DriverManager; import java.sql.SQLException; -import java.sql.Statement; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; -import java.util.Properties; import org.apache.log4j.Logger; -/** - * @author D. Pierrakos, S. Zoupanos - */ /** * @author D. Pierrakos, S. Zoupanos */ @@ -37,7 +31,9 @@ public abstract class ConnectDB { private static String usageStatsDBSchema; private static String usagestatsPermanentDBSchema; private static String statsDBSchema; - private final static Logger log = Logger.getLogger(ConnectDB.class); + + private ConnectDB() { + } static void init() throws ClassNotFoundException { @@ -94,10 +90,6 @@ public abstract class ConnectDB { } private static Connection connectHive() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbHiveUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbHiveUrl); cpds.setAcquireIncrement(1); @@ -119,10 +111,6 @@ public abstract class ConnectDB { } private static Connection connectImpala() throws SQLException { - /* - * Connection connection = DriverManager.getConnection(dbImpalaUrl); Statement stmt = - * connection.createStatement(); log.debug("Opened database successfully"); return connection; - */ ComboPooledDataSource cpds = new ComboPooledDataSource(); cpds.setJdbcUrl(dbImpalaUrl); cpds.setAcquireIncrement(1); From 61d811ba531ab79ce314f799fa8471007f7dd03c Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 11 Aug 2021 12:18:20 +0200 Subject: [PATCH 048/166] suggestions from intellij --- .../eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java | 2 +- .../dhp/oa/graph/raw/PatchRelationApplicationTest.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java index 6c309acb3..cc36421a0 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java @@ -20,7 +20,7 @@ import eu.dnetlib.dhp.collection.HttpConnector2; public class EXCELParserTest { private static Path workingDir; - private HttpConnector2 httpConnector = new HttpConnector2(); + private final HttpConnector2 httpConnector = new HttpConnector2(); private static final String URL = "https://cordis.europa.eu/data/reference/cordisref-h2020topics.xlsx"; @BeforeAll diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java index c8158c044..c9c32edd9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/PatchRelationApplicationTest.java @@ -81,8 +81,8 @@ public class PatchRelationApplicationTest { PatchRelationsApplication.main(new String[] { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-graphBasePath", graphBasePath, - "-workingDir", workingDir.toString() + "/workingDir", - "-idMappingPath", workingDir.toString() + "/" + ID_MAPPING_PATH + "-workingDir", workingDir + "/workingDir", + "-idMappingPath", workingDir + "/" + ID_MAPPING_PATH }); final List rels = spark From 9f4db73f30981aafb57440a3332c4095ef30f8d9 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 11 Aug 2021 15:02:51 +0200 Subject: [PATCH 049/166] updated/fixed unit tests --- .../collection/plugin/rest/RestIterator.java | 1 - .../oa/dedup/DispatchEntitiesSparkJob.java | 1 + .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 6 ++-- .../dnetlib/dhp/oa/dedup/SparkStatsTest.java | 29 +++++++++++-------- .../doiboost/orcid/OrcidClientTest.java | 1 + .../orcidnodoi/PublicationToOafTest.java | 4 +-- .../graph/dump/community/CommunitySplit.java | 28 ++++++++++-------- .../oa/graph/dump/SplitForCommunityTest.java | 2 +- .../CreateRelatedEntitiesJob_phase1.java | 7 +++-- .../oa/provision/utils/XmlRecordFactory.java | 1 - 10 files changed, 45 insertions(+), 35 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java index 67ec22b46..a90d259b4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java @@ -132,7 +132,6 @@ public class RestIterator implements Iterator { private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath) throws TransformerConfigurationException, XPathExpressionException { final TransformerFactory factory = TransformerFactory.newInstance(); - factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); transformer = factory.newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3"); diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java index 2cdc22153..ea738836b 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DispatchEntitiesSparkJob.java @@ -57,6 +57,7 @@ public class DispatchEntitiesSparkJob { String graphTableClassName = parser.get("graphTableClassName"); log.info("graphTableClassName: {}", graphTableClassName); + @SuppressWarnings("unchecked") Class entityClazz = (Class) Class.forName(graphTableClassName); SparkConf conf = new SparkConf(); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index 1860a3ff0..2f992bd78 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -195,7 +195,7 @@ public class SparkDedupTest implements Serializable { assertEquals(3082, orgs_simrel); assertEquals(7036, pubs_simrel); - assertEquals(344, sw_simrel); + assertEquals(336, sw_simrel); assertEquals(442, ds_simrel); assertEquals(6750, orp_simrel); } @@ -346,7 +346,7 @@ public class SparkDedupTest implements Serializable { assertEquals(1272, orgs_mergerel); assertEquals(1438, pubs_mergerel); - assertEquals(288, sw_mergerel); + assertEquals(286, sw_mergerel); assertEquals(472, ds_mergerel); assertEquals(718, orp_mergerel); @@ -535,7 +535,7 @@ public class SparkDedupTest implements Serializable { long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); - assertEquals(4862, relations); + assertEquals(4860, relations); // check deletedbyinference final Dataset mergeRels = spark diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java index 2efbe260a..1ba2c717c 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkStatsTest.java @@ -41,7 +41,7 @@ public class SparkStatsTest implements Serializable { private static final String testActionSetId = "test-orchestrator"; @BeforeAll - public static void cleanUp() throws IOException, URISyntaxException { + public static void beforeAll() throws IOException, URISyntaxException { testGraphBasePath = Paths .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/entities").toURI()) @@ -73,7 +73,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator.xml"))); @@ -82,7 +82,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); @@ -91,7 +91,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json"))); @@ -100,7 +100,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json"))); @@ -109,7 +109,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json"))); @@ -118,7 +118,7 @@ public class SparkStatsTest implements Serializable { .thenReturn( IOUtils .toString( - SparkDedupTest.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json"))); } @@ -129,7 +129,7 @@ public class SparkStatsTest implements Serializable { ArgumentApplicationParser parser = new ArgumentApplicationParser( IOUtils .toString( - SparkCreateSimRels.class + SparkStatsTest.class .getResourceAsStream( "/eu/dnetlib/dhp/oa/dedup/createBlockStats_parameters.json"))); parser @@ -168,10 +168,15 @@ public class SparkStatsTest implements Serializable { .textFile(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_blockstats") .count(); - assertEquals(549, orgs_blocks); - assertEquals(299, pubs_blocks); + assertEquals(477, orgs_blocks); + assertEquals(295, pubs_blocks); assertEquals(122, sw_blocks); - assertEquals(186, ds_blocks); - assertEquals(170, orp_blocks); + assertEquals(191, ds_blocks); + assertEquals(171, orp_blocks); + } + + @AfterAll + public static void tearDown() { + spark.close(); } } diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index 82577e0d8..9ea7c6959 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -160,6 +160,7 @@ public class OrcidClientTest { } @Test + @Disabled void testReadBase64CompressedRecord() throws Exception { final String base64CompressedRecord = IOUtils .toString(getClass().getResourceAsStream("0000-0003-3028-6161.compressed.base64")); diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java index 1dbb37fca..54c16b5d7 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/PublicationToOafTest.java @@ -4,6 +4,7 @@ package eu.dnetlib.doiboost.orcidnodoi; import static org.junit.jupiter.api.Assertions.*; import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -14,14 +15,13 @@ import com.google.gson.JsonParser; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.doiboost.orcidnodoi.oaf.PublicationToOaf; -import jdk.nashorn.internal.ir.annotations.Ignore; class PublicationToOafTest { private static final Logger logger = LoggerFactory.getLogger(PublicationToOafTest.class); @Test - @Ignore + @Disabled void convertOafPublicationTest() throws Exception { String jsonPublication = IOUtils .toString( diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java index a9c0770a8..b92eb3e60 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/dump/community/CommunitySplit.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.oa.graph.dump.community; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; +import java.util.NoSuchElementException; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -48,29 +49,32 @@ public class CommunitySplit implements Serializable { .union(Utils.readPath(spark, inputPath + "/software", CommunityResult.class)); communities - .stream() .forEach(c -> printResult(c, result, outputPath)); } - private static void printResult(String c, Dataset result, String outputPath) { - Dataset community_products = result - .filter((FilterFunction) r -> containsCommunity(r, c)); + private static void printResult(String community, Dataset result, String outputPath) { + Dataset communityProducts = result + .filter((FilterFunction) r -> containsCommunity(r, community)); - community_products.first(); - community_products - .write() - .option("compression", "gzip") - .mode(SaveMode.Overwrite) - .json(outputPath + "/" + c); + try { + communityProducts.first(); + communityProducts + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(outputPath + "/" + community); + } catch (NoSuchElementException e) { + // ignoring it on purpose + } } - private static boolean containsCommunity(CommunityResult r, String c) { + private static boolean containsCommunity(CommunityResult r, String community) { if (Optional.ofNullable(r.getContext()).isPresent()) { return !r .getContext() .stream() - .filter(con -> con.getCode().equals(c)) + .filter(con -> con.getCode().equals(community)) .collect(Collectors.toList()) .isEmpty(); } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java index 2ea4bc91a..e6f0e9106 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/SplitForCommunityTest.java @@ -62,7 +62,7 @@ public class SplitForCommunityTest { } @Test - void test1() { + void testCommunitySplit() { final String sourcePath = getClass() .getResource("/eu/dnetlib/dhp/oa/graph/dump/splitForCommunity") diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java index 3301577ee..ed33ff6b6 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java @@ -72,6 +72,7 @@ public class CreateRelatedEntitiesJob_phase1 { String graphTableClassName = parser.get("graphTableClassName"); log.info("graphTableClassName: {}", graphTableClassName); + @SuppressWarnings("unchecked") Class entityClazz = (Class) Class.forName(graphTableClassName); SparkConf conf = new SparkConf(); @@ -223,10 +224,10 @@ public class CreateRelatedEntitiesJob_phase1 { /** * Reads a Dataset of eu.dnetlib.dhp.oa.provision.model.SortableRelation objects from a newline delimited json text - * file, + * file * - * @param spark - * @param relationPath + * @param spark the SparkSession + * @param relationPath the path storing the relation objects * @return the Dataset containing all the relationships */ private static Dataset readPathRelation( diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 631335376..adc8aca9e 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -1299,7 +1299,6 @@ public class XmlRecordFactory implements Serializable { private Transformer getTransformer() { try { final TransformerFactory factory = TransformerFactory.newInstance(); - factory.setFeature(DISALLOW_DOCTYPE_DECL, true); Transformer transformer = factory.newTransformer(); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); return transformer; From 52c18c2697b17e17916284b7b2f028ad28df7bcf Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 16:16:55 +0200 Subject: [PATCH 050/166] removed not needed test class. Teh functionality has been moved --- .../eu/dnetlib/dhp/doiboost/GetCSVTest.java | 43 ------------------- 1 file changed, 43 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java deleted file mode 100644 index 6cfc90736..000000000 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/dhp/doiboost/GetCSVTest.java +++ /dev/null @@ -1,43 +0,0 @@ - -package eu.dnetlib.dhp.doiboost; - -import java.util.List; - -import org.apache.commons.io.IOUtils; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser; - -public class GetCSVTest { - - @Test - public void readUnibiGoldTest() throws Exception { - - String programmecsv = IOUtils - .toString( - getClass() - .getClassLoader() - .getResourceAsStream("eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv")); - - CSVParser csvParser = new CSVParser(); - - List pl = csvParser.parse(programmecsv, "eu.dnetlib.doiboost.UnibiGoldModel", ','); - - Assertions.assertEquals(72, pl.size()); - -// ObjectMapper OBJECT_MAPPER = new ObjectMapper(); -// -// pl.forEach(res -> { -// try { -// System.out.println(OBJECT_MAPPER.writeValueAsString(res)); -// } catch (JsonProcessingException e) { -// e.printStackTrace(); -// } -// }); - - } -} From b1c6140ebf01608af7792d4d528c83ea62760616 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 16:23:33 +0200 Subject: [PATCH 051/166] removed all comments in Italian --- .../hostedbymap/SparkApplyHostedByMapToDatasource.scala | 2 -- .../hostedbymap/SparkApplyHostedByMapToResult.scala | 5 ----- .../hostedbymap/SparkPrepareHostedByInfoToApply.scala | 9 +++++---- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index fad313f1c..ae1454b47 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -61,8 +61,6 @@ object SparkApplyHostedByMapToDatasource { val pinfo : Dataset[EntityInfo] = Aggregators.datasourceToSingleId( spark.read.textFile(preparedInfoPath) .map(ei => mapper.readValue(ei, classOf[EntityInfo]))) - //c. dataset join risultato del passo prima di a per datasource id, gruppo per ds id e cambio compatibilita' se necessario - applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 4c3b98f3b..533e439b7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -14,9 +14,6 @@ import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ -//a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance -// nel result => salto)con l'hosted by anche access right della instance se openaccess e' true - object SparkApplyHostedByMapToResult { @@ -76,8 +73,6 @@ object SparkApplyHostedByMapToResult { val pinfo : Dataset[EntityInfo] = spark.read.textFile(preparedInfoPath) .map(ei => mapper.readValue(ei, classOf[EntityInfo])) - //a. publication join risultato del passo precedente su result id (left) setto la istanza (se piu' di una instance - // nel result => salto)con l'hosted by anche access right della instance se openaccess e' true applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index d342d57b0..6db0ad33d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -106,14 +106,15 @@ object SparkPrepareHostedByInfoToApply { import spark.implicits._ - //STEP1: leggere la hostedbymap e trasformarla in entity info + //STEP1: read the hostedbymap and transform it in EntityInfo val hostedByInfo:Dataset[EntityInfo] = spark.createDataset(spark.sparkContext.textFile(hostedByMapPath)).map(toEntityInfo) - //STEP2: creare la mappa publication id issn, eissn, lissn esplosa + //STEP2: create association (publication, issn), (publication, eissn), (publication, lissn) val resultInfoDataset:Dataset[EntityInfo] = prepareResultInfo(spark, graphPath + "/publication") - //STEP3: join resultInfo con hostedByInfo sul journal_id dal result con left - // e riduzione di tutti i result con lo stesso id in una unica entry con aggiunto l'id della datasource + //STEP3: left join resultInfo with hostedByInfo on journal_id. Reduction of all the results with the same id in just + //one entry (one result could be associated to issn and eissn and so possivly matching more than once against the map) + //to this entry we add the id of the datasource for the next step joinResHBM(resultInfoDataset, hostedByInfo) .write.mode(SaveMode.Overwrite).option("compression", "gzip").json(outputPath) From 822963283954830d16429411e983529de815b179 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 16:36:01 +0200 Subject: [PATCH 052/166] adding assertions to the mapping of the unibi part of gold list --- .../dhp/oa/graph/hostedbymap/TestReadCSV.java | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java index f886b275b..98b811bb6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java @@ -7,6 +7,8 @@ import java.net.URLConnection; import java.nio.charset.Charset; import java.util.List; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import com.fasterxml.jackson.core.JsonProcessingException; @@ -29,18 +31,13 @@ public class TestReadCSV { .build() .parse(); - ObjectMapper mapper = new ObjectMapper(); - - beans.forEach(r -> { - try { - System.out.println(mapper.writeValueAsString(r)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - }); + Assertions.assertEquals(36, beans.size()); + Assertions.assertEquals(1, beans.stream().filter(e -> e.getIssn().equals("0001-625X")).count()); + Assertions.assertTrue(beans.stream().anyMatch(e -> e.getIssn().equals("0001-625X") && e.getTitle().equals("Acta Mycologica"))); + Assertions.assertTrue(beans.stream().allMatch(e -> e.getIssn().equals(e.getIssn_l()))); } - + @Disabled @Test public void testCSVUrlUnibi() throws IOException { @@ -66,6 +63,7 @@ public class TestReadCSV { ); } + @Disabled @Test public void testCSVUrlDOAJ() throws IOException { From f9b6b45d85e3ea9e676062366a2e249de1f7bd03 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:04:48 +0200 Subject: [PATCH 053/166] reverting --- .../src/test/resources/eu/dnetlib/dhp/transform/terms.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt b/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt index 30138b5bc..93cc00eca 100644 --- a/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt +++ b/dhp-common/src/test/resources/eu/dnetlib/dhp/transform/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn +datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID From 55fc500d8d0d6736ca10e06c01e83c784299e93f Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:17:48 +0200 Subject: [PATCH 054/166] reverting --- .../src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml index b421f1342..06325810b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input_itgv4.xml @@ -31,7 +31,7 @@ https://pub.uni-bielefeld.de/record/1997560.json - 0016-9056 + 0016-9056 ger Friedrich From 7aa3260729a7a145842958526d3feedfa3461e8c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:18:45 +0200 Subject: [PATCH 055/166] reverting --- .../original/dc_cleaning_OPENAIREplus_compliant_hal_orig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig index 505f56294..d4eb71dc4 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/dc_cleaning_OPENAIREplus_compliant_hal_orig @@ -134,9 +134,9 @@ oaf:identifier = set(xpath:"//dri:recordIdentifier", @identifierType = "oai-orig oaf:datasourceprefix = xpath:"//oaf:datasourceprefix"; // journal data -// avoiding regular expressions, while a) correcting ISSNs with no - or other letters instead of - and b) ignoring any stuff after the issn (as e.g. print/online/...) -$varISSN = xpath:"//dc:source[starts-with(., 'issn:') and string-length(.) > 12]/concat(substring(normalize-space(substring-after(., 'issn:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'issn:')), 1, 4))))"; -//$varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/normalize-space(substring-after(., 'issn:'))"; +// avoiding regular expressions, while a) correcting ISSNs with no - or other letters instead of - and b) ignoring any stuff after the ISSN (as e.g. print/online/...) +$varISSN = xpath:"//dc:source[starts-with(., 'ISSN:') and string-length(.) > 12]/concat(substring(normalize-space(substring-after(., 'ISSN:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'ISSN:')), 1, 4))))"; +//$varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/normalize-space(substring-after(., 'ISSN:'))"; $varEISSN = xpath:"//dc:source[starts-with(., 'EISSN:') and string-length(.) > 13]/concat(substring(normalize-space(substring-after(., 'EISSN:')), 1, 4), '-', normalize-space(substring-after(., substring(normalize-space(substring-after(., 'EISSN:')), 1, 4))))"; oaf:journal = set(xpath:"//oaf:datasourceprefix[$varISSN or $varEISSN]/''", @issn = xpath:"$varISSN";, @eissn = xpath:"$varEISSN";); From 524c06e0283244758df98add2d622db44dd7f16b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:20:30 +0200 Subject: [PATCH 056/166] reverting --- ...e_datacite_ExchangeLandingpagePid_orig.xsl | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl index f80c91a6a..3cfaec80b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/scripts/original/xslt_cleaning_oaiOpenaire_datacite_ExchangeLandingpagePid_orig.xsl @@ -169,7 +169,7 @@ @@ -283,7 +283,7 @@ - + - + - + @@ -793,9 +793,9 @@ - + - + @@ -844,7 +844,7 @@ - + - + - + @@ -594,9 +594,9 @@ - + - + From 804589eb30f616314c9d9e01c0681312548e2777 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:23:35 +0200 Subject: [PATCH 058/166] reverting --- .../src/test/resources/eu/dnetlib/dhp/transform/terms.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt index 30138b5bc..93cc00eca 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn +datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID From b6b58bba281176ea534c75cee5a440b99ac4908c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:25:37 +0200 Subject: [PATCH 059/166] reverting --- .../eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv b/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv index 0c1fd9e64..6a5fc3f87 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu.dnetlib.dhp.doiboost.mappings/crossref_mapping.csv @@ -36,10 +36,10 @@ article-number,String,No,,N/A, published-print,Partial Date,No,Date on which the work was published in print,"Result/relevantDate with Qualifier(""published-print"", ""dnet:dataCite_date"")", published-online,Partial Date,No,Date on which the work was published online,"Result/relevantDate with Qualifier(""published-online"", ""dnet:dataCite_date"")", subject,Array of String,No,"Subject category names, a controlled vocabulary from Sci-Val. Available for most journal articles","Result/subject with Qualifier(""keywords"", ""dnet:subject_classification_typologies""). ","Future improvements: map the controlled vocabulary instead of using the generic ""keywords"" qualifier" -issn,Array of String,No,,"Publication/Journal/issn +ISSN,Array of String,No,,"Publication/Journal/issn Publication/Journal/lissn Publication/Journal/eissn",The mapping depends on the value of issn-type -issn-type,Array of issn with Type,No,List of ISSNs with issn type information,N/A,Its value guides the setting of the properties in Journal (see row above) +issn-type,Array of ISSN with Type,No,List of ISSNs with ISSN type information,N/A,Its value guides the setting of the properties in Journal (see row above) ISBN,Array of String,No,,Publication/source,"In case of Book We can map ISBN and container title on Publication/source using this syntax container-title + ""ISBN: "" + ISBN" archive,Array of String,No,,N/A, license,Array of License,No,,Result/Instance/License, From c6a2a780a9b181395ae455a661ff620ecbcb6f22 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:30:17 +0200 Subject: [PATCH 060/166] reverting --- .../src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt index 4db5fd463..ba47aaf5c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn +datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID From 0e1a6bec20edf4995f9ccb9825907f3a545f3817 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:32:29 +0200 Subject: [PATCH 061/166] reverting --- .../resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml index b45fa1edb..95457fb70 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/oaf_claim_dedup.xml @@ -16,7 +16,7 @@ Doğuş Üniversitesi, Fen Edebiyat Fakültesi, Fizik Bölümü TR3959 urn:issn:1748-0221 - VOLUME=7;ISSUE=1;issn=1748-0221;TITLE=Journal of Instrumentation + VOLUME=7;ISSUE=1;ISSN=1748-0221;TITLE=Journal of Instrumentation ATLAS Collaboration Mitsou, Vasiliki Fiorini, Luca Ros Martínez, Eduardo Castillo Giménez, María Victoria Fuster Verdú, Juan A. García García, Carmen Cabrera Urbán, Susana Martí García, Salvador Salt Cairols, José Lacasta Llácer, Carlos Valls Ferrer, @@ -30,7 +30,7 @@ interactions. Journal of Instrumentation, 7(1). doi: 10.1088/1748-0221/7/01/P01013. UC Santa Cruz: Retrieved from: http://www.escholarship.org/uc/item/05j2j2br Journal of Instrumentation, 7 - VOLUME=7;issn=1748-0221;TITLE=Journal of Instrumentation + VOLUME=7;ISSN=1748-0221;TITLE=Journal of Instrumentation 1748-0221 Journal of Instrumentation 7, P01013 (2012). doi:10.1088/1748-0221/7/01/P01013 From 8ad7c7141702b3a1638d7d1abe26889f300ea480 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:36:12 +0200 Subject: [PATCH 062/166] reverting --- .../dhp/sx/graph/bio/pubmed/pubmed.xml | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml index 06ebe97cd..22da07e29 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/pubmed.xml @@ -16,7 +16,7 @@
- 0006-2944 + 0006-2944 13 2 @@ -182,7 +182,7 @@
- 1090-2104 + 1090-2104 66 4 @@ -314,7 +314,7 @@
- 0006-291X + 0006-291X 66 4 @@ -460,7 +460,7 @@
- 1090-2104 + 1090-2104 66 4 @@ -644,7 +644,7 @@
- 1090-2104 + 1090-2104 66 4 @@ -774,7 +774,7 @@
- 0006-291X + 0006-291X 66 4 @@ -934,7 +934,7 @@
- 1873-2968 + 1873-2968 24 16 @@ -1614,7 +1614,7 @@
- 1873-2968 + 1873-2968 24 16 @@ -2017,7 +2017,7 @@
- 0006-2952 + 0006-2952 24 17 @@ -2185,7 +2185,7 @@
- 1873-2968 + 1873-2968 24 17 @@ -2337,7 +2337,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -2585,7 +2585,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -2838,7 +2838,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3020,7 +3020,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3204,7 +3204,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3450,7 +3450,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3683,7 +3683,7 @@
- 0006-2952 + 0006-2952 24 18 @@ -3880,7 +3880,7 @@
- 0006-2952 + 0006-2952 24 20 @@ -4069,7 +4069,7 @@
- 0006-2952 + 0006-2952 24 20 @@ -4428,7 +4428,7 @@
- 0006-2952 + 0006-2952 24 20 @@ -4590,7 +4590,7 @@
- 0004-4172 + 0004-4172 25 9 @@ -4741,7 +4741,7 @@
- 0004-4172 + 0004-4172 25 9 @@ -4999,7 +4999,7 @@
- 0004-4172 + 0004-4172 25 9 From b9663298330212499fef14413b35ced9d336dc57 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:37:00 +0200 Subject: [PATCH 063/166] reverting --- .../src/test/resources/eu/dnetlib/dhp/transform/terms.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt index 30138b5bc..93cc00eca 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/transform/terms.txt @@ -1009,7 +1009,7 @@ datacite:id_typologies @=@ datacite:id_typologies @=@ EAN13 @=@ EAN13 datacite:id_typologies @=@ datacite:id_typologies @=@ EISSN @=@ EISSN datacite:id_typologies @=@ datacite:id_typologies @=@ Handle @=@ Handle datacite:id_typologies @=@ datacite:id_typologies @=@ ISBN @=@ ISBN -datacite:id_typologies @=@ datacite:id_typologies @=@ issn @=@ issn +datacite:id_typologies @=@ datacite:id_typologies @=@ ISSN @=@ ISSN datacite:id_typologies @=@ datacite:id_typologies @=@ ISTC @=@ ISTC datacite:id_typologies @=@ datacite:id_typologies @=@ LISSN @=@ LISSN datacite:id_typologies @=@ datacite:id_typologies @=@ LSID @=@ LSID From cc3d72df0e9dd8781e0ae8ae59b57032bca4098e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:42:01 +0200 Subject: [PATCH 064/166] removing not needed dependency --- dhp-common/pom.xml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index a6a57ce7b..c66c1d3bc 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -118,11 +118,7 @@ dhp-schemas - - org.apache.commons - commons-csv - 1.8 - + From 95e5482bbb35acc1ae269fb018017ec3f8240583 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:42:26 +0200 Subject: [PATCH 065/166] removing not needed dependency --- dhp-workflows/dhp-doiboost/pom.xml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index ea8832754..385f87ad2 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -82,12 +82,6 @@ org.apache.commons commons-text - - eu.dnetlib.dhp - dhp-aggregation - 1.2.4-SNAPSHOT - compile - From 785db1d5b2c26cf3e81bc9a38b54ec60c26764f9 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:44:07 +0200 Subject: [PATCH 066/166] refactoring --- .../eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java index 98b811bb6..89259d814 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java @@ -33,10 +33,15 @@ public class TestReadCSV { Assertions.assertEquals(36, beans.size()); Assertions.assertEquals(1, beans.stream().filter(e -> e.getIssn().equals("0001-625X")).count()); - Assertions.assertTrue(beans.stream().anyMatch(e -> e.getIssn().equals("0001-625X") && e.getTitle().equals("Acta Mycologica"))); + Assertions + .assertTrue( + beans + .stream() + .anyMatch(e -> e.getIssn().equals("0001-625X") && e.getTitle().equals("Acta Mycologica"))); Assertions.assertTrue(beans.stream().allMatch(e -> e.getIssn().equals(e.getIssn_l()))); } + @Disabled @Test public void testCSVUrlUnibi() throws IOException { From 9650eea497b3d131fab34f37afab12c872a26afc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:45:48 +0200 Subject: [PATCH 067/166] reverting --- dhp-common/pom.xml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index c66c1d3bc..4c7810c47 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -117,12 +117,6 @@ eu.dnetlib.dhp dhp-schemas - - - - - - From e33daaeee89b253cde8968a8c33cad4b346e9aae Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:46:19 +0200 Subject: [PATCH 068/166] reverting --- dhp-workflows/dhp-doiboost/pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-workflows/dhp-doiboost/pom.xml b/dhp-workflows/dhp-doiboost/pom.xml index 385f87ad2..f496ea9a2 100644 --- a/dhp-workflows/dhp-doiboost/pom.xml +++ b/dhp-workflows/dhp-doiboost/pom.xml @@ -84,6 +84,7 @@ + From ac417ca798b07e77765d12ffe8c3c3774b1bc111 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 17:50:33 +0200 Subject: [PATCH 069/166] removed not needed test resource --- .../dhp/doiboost/issn_gold_oa_version_4.csv | 73 ------------------- 1 file changed, 73 deletions(-) delete mode 100644 dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv deleted file mode 100644 index ebde1bf18..000000000 --- a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/dhp/doiboost/issn_gold_oa_version_4.csv +++ /dev/null @@ -1,73 +0,0 @@ -"issn","ISSN_L","ISSN_IN_DOAJ","ISSN_IN_ROAD","ISSN_IN_PMC","ISSN_IN_OAPC","ISSN_IN_WOS","ISSN_IN_SCOPUS","JOURNAL_IN_DOAJ","JOURNAL_IN_ROAD","JOURNAL_IN_PMC","JOURNAL_IN_OAPC","JOURNAL_IN_WOS","JOURNAL_IN_SCOPUS","TITLE","TITLE_SOURCE" -"0001-625X","0001-625X",1,1,0,0,0,1,1,1,0,0,0,1,"Acta Mycologica","DOAJ" -"0002-0397","0002-0397",1,1,0,0,1,1,1,1,0,0,1,1,"Africa Spectrum","DOAJ" -"0003-2565","0003-2565",1,0,0,0,0,0,1,0,0,0,0,0,"Anali Pravnog Fakulteta u Beogradu","DOAJ" -"0003-424X","0003-424X",0,1,0,0,1,0,0,1,0,0,1,0,"Annales de zootechnie.","ROAD" -"0003-4827","0003-4827",0,1,0,0,0,1,0,1,0,0,0,1,"Annals of Iowa.","ROAD" -"0004-0592","0004-0592",1,1,0,0,1,1,1,1,0,0,1,1,"Archivos de Zootecnia","DOAJ" -"0004-282X","0004-282X",1,1,0,0,1,1,1,1,0,0,1,1,"Arquivos de Neuro-Psiquiatria","DOAJ" -"0006-3096","0006-3096",0,1,0,0,0,0,0,1,0,0,0,0,"Biologia.","ROAD" -"0006-8705","0006-8705",1,1,0,0,1,1,1,1,0,0,1,1,"Bragantia","DOAJ" -"0007-5124","0007-5124",0,1,0,0,1,0,0,1,1,0,1,1,"Experimental animals.","ROAD" -"0007-9502","0007-9502",0,1,0,0,0,0,0,1,0,0,0,0,"Caesaraugusta.","ROAD" -"0008-7386","0008-7386",1,1,0,0,0,1,1,1,0,0,0,1,"Časopis pro Moderní Filologii","DOAJ" -"0008-7629","0008-7629",1,0,0,0,0,0,1,0,0,0,0,0,"Catalogue and Index","DOAJ" -"0015-573X","0015-573X",0,1,0,0,0,0,0,1,0,0,0,0,"Folia quaternaria.","ROAD" -"0016-6987","0016-6987",1,0,0,0,1,1,1,0,0,0,1,1,"Genus","DOAJ" -"0016-7789","0016-7789",1,1,0,0,0,1,1,1,0,0,0,1,"Geologija ","DOAJ" -"0021-5007","0021-5007",0,1,0,0,0,1,0,1,0,0,0,1,"Nihon Seitai Gakkaishi.","ROAD" -"0023-4001","0023-4001",0,1,0,0,1,1,0,1,0,0,1,1,"Korean Journal of Parasitology","ROAD" -"0023-5415","0023-5415",1,1,0,0,0,0,1,1,0,0,0,0,"Kunst og Kultur","DOAJ" -"0026-1165","0026-1165",1,0,0,0,1,1,1,0,0,0,1,1,"Journal of the Meteorological Society of Japan","DOAJ" -"0029-0181","0029-0181",0,1,0,0,0,0,0,1,0,0,0,0,"Nihon butsuri gakkaishi.","ROAD" -"0034-7000","0034-7000",1,1,0,0,0,1,1,1,0,0,0,1,"Revista Argentina de Cardiología","DOAJ" -"0034-7523","0034-7523",0,1,0,0,0,1,0,1,0,0,0,1,"Revista cubana de medicina.","ROAD" -"0034-8244","0034-8244",1,0,0,0,1,1,1,0,0,0,1,1,"Revista de Filosofia","DOAJ" -"0034-8678","0034-8678",1,0,0,0,0,0,1,0,0,0,0,0,"Revista de Pedagogie","DOAJ" -"0036-8709","0036-8709",1,1,1,0,1,1,1,1,1,0,1,1,"Scientia Pharmaceutica","DOAJ" -"0044-4855","0044-4855",0,1,0,0,0,0,0,1,0,0,0,0,"Život i škola.","ROAD" -"0048-7449","0048-7449",1,1,0,0,1,1,1,1,0,0,1,1,"Reumatismo","DOAJ" -"0048-766X","0048-766X",0,1,0,0,0,1,0,1,0,0,0,1,"Revista chilena de obstetricia y ginecología.","ROAD" -"0065-1400","0065-1400",0,1,0,0,1,1,0,1,0,0,1,1,"Acta Neurobiologiae Experimentalis.","ROAD" -"0066-6742","0066-6742",1,0,0,0,1,1,1,0,0,0,1,1,"Archivo Español de Arqueología","DOAJ" -"0073-2435","0073-2435",1,1,0,0,1,1,1,1,0,0,1,1,"Historia (Santiago)","DOAJ" -"0073-4918","0073-4918",0,1,0,0,0,0,0,1,0,0,0,0,"Illinois Natural History Survey bulletin.","ROAD" -"0075-7411","0075-7411",1,0,0,0,0,0,1,0,0,0,0,0,"Anales","DOAJ" -"0077-2704","0077-2704",0,1,0,0,0,0,0,1,0,0,0,0,"Namn och bygd.","ROAD" -"0078-5466","0078-5466",0,1,0,0,1,1,0,1,0,0,1,1,"Optica Applicata.","ROAD" -"0079-4929","0079-4929",1,1,0,0,0,0,1,1,0,0,0,0,"Právněhistorické studie","DOAJ" -"0100-3283","0100-3283",0,1,0,0,0,0,0,1,0,0,0,0,"Hansenologia Internationalis.","ROAD" -"0100-4042","0100-4042",1,1,0,0,1,1,1,1,0,0,1,1,"Química Nova","DOAJ" -"0100-8692","0100-8692",1,1,0,0,1,0,1,1,0,0,1,1,"Arquivos Brasileiros de Psicologia ","DOAJ" -"0102-4469","0102-4469",1,0,0,0,0,0,1,0,0,0,0,0,"Perspectiva Teológica","DOAJ" -"0102-6992","0102-6992",1,1,0,0,0,1,1,1,0,0,0,1,"Sociedade e Estado","DOAJ" -"0103-1570","0103-1570",1,1,0,0,0,0,1,1,0,0,0,0,"Revista Sociedade & Natureza","DOAJ" -"0103-2070","0103-2070",1,1,0,0,1,1,1,1,0,0,1,1,"Tempo Social","DOAJ" -"0104-0588","0104-0588",1,1,0,0,1,0,1,1,0,0,1,0,"Revista de Estudos da Linguagem","DOAJ" -"0104-6497","0104-6497",1,1,0,0,1,0,1,1,0,0,1,0,"Nauplius","DOAJ" -"0104-8929","0104-8929",0,1,0,0,0,0,0,1,0,0,0,0,"Saeculum.","ROAD" -"0104-9496","0104-9496",1,0,0,0,0,0,1,0,0,0,0,0,"Revista do Direito","DOAJ" -"0120-0380","0120-0380",0,1,0,0,1,0,0,1,0,0,1,0,"Boletín de matemáticas.","ROAD" -"0120-100X","0120-100X",1,1,0,0,0,0,1,1,0,0,0,0,"Revista Ion","DOAJ" -"0120-4807","0120-4807",1,1,0,0,0,0,1,1,0,0,0,0,"Universitas Humanística","DOAJ" -"0121-4004","0121-4004",1,0,0,0,1,1,1,0,0,0,1,1,"Vitae","DOAJ" -"0121-4500","0121-4500",1,1,0,0,0,0,1,1,0,0,0,0,"Avances en Enfermería","DOAJ" -"0121-8697","0121-8697",1,0,0,0,0,0,1,0,0,0,0,0,"Revista de Derecho","DOAJ" -"0122-5197","0122-5197",0,1,0,0,0,1,0,1,0,0,0,1,"Memoria y Sociedad.","ROAD" -"0161-0457","0161-0457",1,0,1,1,1,1,1,0,1,1,1,1,"Scanning","DOAJ" -"0215-4706","0215-4706",1,1,0,0,0,0,1,1,0,0,0,0,"Floribunda.","ROAD" -"0324-6000","0324-6000",0,1,0,0,1,1,0,1,0,0,1,1,"Periodica polytechnica. Electrical engineering","ROAD" -"0325-187X","0325-187X",1,1,0,0,0,1,1,1,0,0,0,1,"Meteorologica","DOAJ" -"0326-7237","0326-7237",1,1,0,0,0,1,1,1,0,0,0,1,"Geoacta.","ROAD" -"0327-1676","0327-1676",0,1,0,0,0,0,1,1,0,0,0,0,"Andes","DOAJ" -"0327-2818","0327-2818",1,0,0,0,0,0,1,0,0,0,0,0,"Dominguezia","DOAJ" -"0327-5108","0327-5108",1,0,0,0,0,0,1,0,0,0,0,0,"Páginas de Filosofía","DOAJ" -"0327-585X","0327-585X",1,1,0,0,0,0,1,1,0,0,0,0,"Actualidad Económica","DOAJ" -"0327-6147","0327-6147",0,1,0,0,0,0,0,1,0,0,0,0,"Papeles de trabajo.","ROAD" -"0327-7763","0327-7763",0,1,0,0,0,0,0,1,0,0,0,0,"Revista del IICE.","ROAD" -"0327-9286","0327-9286",1,1,0,0,0,0,1,1,0,0,0,0,"Acta Toxicológica Argentina","DOAJ" -"0328-1205","0328-1205",1,1,0,0,1,1,1,1,0,0,1,1,"Synthesis (La Plata)","DOAJ" -"0329-5893","0329-5893",0,1,0,0,0,0,0,1,0,0,0,0,"Investigaciones en psicología..","ROAD" -"0329-8213","0329-8213",1,0,0,0,0,0,1,0,0,0,0,0,"Historia Regional","DOAJ" -"0332-5024","0332-5024",1,1,0,0,0,0,1,1,0,0,0,0,"Studia Musicologica Norvegica","DOAJ" -"0350-185X","0350-185X",1,1,0,0,0,0,1,1,0,0,0,0,"Južnoslovenski Filolog","DOAJ" From 08dd2b2102c3a8074c854839cc4b18f948d24b95 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 11 Aug 2021 18:09:41 +0200 Subject: [PATCH 070/166] moving the dependency version to the external pom file --- dhp-workflows/dhp-graph-mapper/pom.xml | 1 - pom.xml | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 665c01276..6c5e4609a 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -125,7 +125,6 @@ com.opencsv opencsv - 5.5 diff --git a/pom.xml b/pom.xml index 433c88093..d31ffb88d 100644 --- a/pom.xml +++ b/pom.xml @@ -519,6 +519,12 @@ ${common.text.version} + + com.opencsv + opencsv + 5.5 + + From 8cdce59e0ebd826efd1b8845011ce3f2bf494e71 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 12 Aug 2021 11:32:26 +0200 Subject: [PATCH 071/166] [graph raw] let the mapping exceptions propagate --- .../raw/AbstractMdRecordToOafMapper.java | 82 +++++++------------ .../raw/GenerateEntitiesApplication.java | 3 +- .../raw/GenerateEntitiesApplicationTest.java | 6 +- .../dhp/oa/graph/raw/odf_fwfebooklibrary.xml | 0 4 files changed, 36 insertions(+), 55 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java index 9aa4e4c31..526f45f6e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractMdRecordToOafMapper.java @@ -28,10 +28,7 @@ import java.util.Optional; import java.util.Set; import org.apache.commons.lang3.StringUtils; -import org.dom4j.Document; -import org.dom4j.DocumentFactory; -import org.dom4j.DocumentHelper; -import org.dom4j.Node; +import org.dom4j.*; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -112,43 +109,40 @@ public abstract class AbstractMdRecordToOafMapper { this.forceOriginalId = false; } - public List processMdRecord(final String xml) { - try { - DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); + public List processMdRecord(final String xml) throws DocumentException { - final Document doc = DocumentHelper - .parseText( - xml - .replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3) - .replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3) - .replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3)); + DocumentFactory.getInstance().setXPathNamespaceURIs(nsContext); - final KeyValue collectedFrom = getProvenanceDatasource( - doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); + final Document doc = DocumentHelper + .parseText( + xml + .replaceAll(DATACITE_SCHEMA_KERNEL_4, DATACITE_SCHEMA_KERNEL_3) + .replaceAll(DATACITE_SCHEMA_KERNEL_4_SLASH, DATACITE_SCHEMA_KERNEL_3) + .replaceAll(DATACITE_SCHEMA_KERNEL_3_SLASH, DATACITE_SCHEMA_KERNEL_3)); - if (collectedFrom == null) { - return Lists.newArrayList(); - } + final KeyValue collectedFrom = getProvenanceDatasource( + doc, "//oaf:collectedFrom/@id", "//oaf:collectedFrom/@name"); - final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) - ? collectedFrom - : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); - - if (hostedBy == null) { - return Lists.newArrayList(); - } - - final DataInfo info = prepareDataInfo(doc, invisible); - final long lastUpdateTimestamp = new Date().getTime(); - - final List instances = prepareInstances(doc, info, collectedFrom, hostedBy); - - final String type = getResultType(doc, instances); - - return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); - } catch (final Exception e) { - throw new RuntimeException(e); + if (collectedFrom == null) { + return Lists.newArrayList(); } + + final KeyValue hostedBy = StringUtils.isBlank(doc.valueOf("//oaf:hostedBy/@id")) + ? collectedFrom + : getProvenanceDatasource(doc, "//oaf:hostedBy/@id", "//oaf:hostedBy/@name"); + + if (hostedBy == null) { + return Lists.newArrayList(); + } + + final DataInfo info = prepareDataInfo(doc, invisible); + final long lastUpdateTimestamp = new Date().getTime(); + + final List instances = prepareInstances(doc, info, collectedFrom, hostedBy); + + final String type = getResultType(doc, instances); + + return createOafs(doc, type, instances, collectedFrom, info, lastUpdateTimestamp); } protected String getResultType(final Document doc, final List instances) { @@ -499,22 +493,6 @@ public abstract class AbstractMdRecordToOafMapper { return vocs.getTermAsQualifier(schemeId, classId); } - protected List prepareListStructProps( - final Node node, - final String xpath, - final String xpathClassId, - final String schemeId, - final DataInfo info) { - final List res = new ArrayList<>(); - - for (final Object o : node.selectNodes(xpath)) { - final Node n = (Node) o; - final String classId = n.valueOf(xpathClassId).trim(); - res.add(structuredProperty(n.getText(), prepareQualifier(classId, schemeId), info)); - } - return res; - } - protected List prepareListStructPropsWithValidQualifier( final Node node, final String xpath, diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java index 9027b49d7..6bb18c375 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplication.java @@ -17,6 +17,7 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; +import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -158,7 +159,7 @@ public class GenerateEntitiesApplication { final String id, final String s, final boolean shouldHashId, - final VocabularyGroup vocs) { + final VocabularyGroup vocs) throws DocumentException { final String type = StringUtils.substringAfter(id, ":"); switch (type.toLowerCase()) { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java index 1e974dd69..67490a470 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/GenerateEntitiesApplicationTest.java @@ -9,6 +9,7 @@ import java.io.IOException; import java.util.List; import org.apache.commons.io.IOUtils; +import org.dom4j.DocumentException; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -44,7 +45,7 @@ class GenerateEntitiesApplicationTest { } @Test - void testMergeResult() throws IOException { + void testMergeResult() throws IOException, DocumentException { Result publication = getResult("oaf_record.xml", Publication.class); Result dataset = getResult("odf_dataset.xml", Dataset.class); Result software = getResult("odf_software.xml", Software.class); @@ -76,7 +77,8 @@ class GenerateEntitiesApplicationTest { assertEquals(resultType, merge.getResulttype().getClassid()); } - protected Result getResult(String xmlFileName, Class clazz) throws IOException { + protected Result getResult(String xmlFileName, Class clazz) + throws IOException, DocumentException { final String xml = IOUtils.toString(getClass().getResourceAsStream(xmlFileName)); return new OdfToOafMapper(vocs, false, true) .processMdRecord(xml) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml new file mode 100644 index 000000000..e69de29bb From 86d940044c067f71ac0bc451885ecd329f117cc6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 12 Aug 2021 11:32:56 +0200 Subject: [PATCH 072/166] added test to verify bad records from FWF-E-Book-Library --- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 43 ++++++++----- .../dhp/oa/graph/raw/odf_fwfebooklibrary.xml | 61 +++++++++++++++++++ 2 files changed, 88 insertions(+), 16 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 000dbfe25..5228d1d02 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -13,6 +13,7 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; +import org.dom4j.DocumentException; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -48,7 +49,7 @@ class MappersTest { } @Test - void testPublication() throws IOException { + void testPublication() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record.xml"))); @@ -145,7 +146,7 @@ class MappersTest { } @Test - void testPublication_PubMed() throws IOException { + void testPublication_PubMed() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record_pubmed.xml"))); @@ -217,7 +218,7 @@ class MappersTest { } @Test - void testPublicationInvisible() throws IOException { + void testPublicationInvisible() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_record.xml"))); @@ -233,7 +234,17 @@ class MappersTest { } @Test - void testDataset() throws IOException { + void testOdfFwfEBookLibrary() throws IOException { + final String xml = IOUtils + .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_fwfebooklibrary.xml"))); + + assertThrows( + IllegalArgumentException.class, + () -> new OdfToOafMapper(vocs, false, true).processMdRecord(xml)); + } + + @Test + void testDataset() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_dataset.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -346,7 +357,7 @@ class MappersTest { } @Test - void testOdfBielefeld() throws IOException { + void testOdfBielefeld() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_bielefeld.xml"))); @@ -394,7 +405,7 @@ class MappersTest { } @Test - void testOpentrial() throws IOException { + void testOpentrial() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_opentrial.xml"))); @@ -511,7 +522,7 @@ class MappersTest { } @Test - void testSoftware() throws IOException { + void testSoftware() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_software.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -530,7 +541,7 @@ class MappersTest { } @Test - void testClaimDedup() throws IOException { + void testClaimDedup() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_dedup.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -544,7 +555,7 @@ class MappersTest { } @Test - void testNakala() throws IOException { + void testNakala() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_nakala.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -572,7 +583,7 @@ class MappersTest { } @Test - void testEnermaps() throws IOException { + void testEnermaps() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("enermaps.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -597,7 +608,7 @@ class MappersTest { } @Test - void testClaimFromCrossref() throws IOException { + void testClaimFromCrossref() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_claim_crossref.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -614,7 +625,7 @@ class MappersTest { } @Test - void testODFRecord() throws IOException { + void testODFRecord() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_record.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); System.out.println("***************"); @@ -628,7 +639,7 @@ class MappersTest { } @Test - void testTextGrid() throws IOException { + void testTextGrid() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("textgrid.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -662,7 +673,7 @@ class MappersTest { } @Test - void testBologna() throws IOException { + void testBologna() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf-bologna.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -679,7 +690,7 @@ class MappersTest { } @Test - void testJairo() throws IOException { + void testJairo() throws IOException, DocumentException { final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("oaf_jairo.xml"))); final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); @@ -703,7 +714,7 @@ class MappersTest { } @Test - void testOdfFromHdfs() throws IOException { + void testOdfFromHdfs() throws IOException, DocumentException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_from_hdfs.xml"))); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml index e69de29bb..cead018d1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_fwfebooklibrary.xml @@ -0,0 +1,61 @@ + + +
+ oai:e-book.fwf.ac.at:o:116 + 2020-04-08T13:25:21.742Z + text + 2021-08-12T00:16:55.365Z +
+ + + + 978-3-205-77704-5 + https://e-book.fwf.ac.at/o:116 + + + Although small and not particularly peoples both Chechens and Palestinians became famous for suicide bomber attacks in recent years. This can - partly - be explained by the unrecognised collective traumas of the past. Both Chechens and Palestinians experienced collective traumas in the 1940ties. The entire Chechen population wad deported by Josef Stalin to Kasakhstan, Kirgysia and Sibiria in February 1944 under the pretext of collaboration with the Third Reich. Those who survived were allowed to return in 1957 to Chechenya. Half of the Palestinian Arab population was expelled from Palestine in 1947/48, when fighting erupted between Jews and Arabs. The refugees were never allowed to return. The memory of the deportation/expulsion was kept alive. The founding traumas contributed to the development of Chechen and Palestinian nationalism. Chechens and Palestinians till today suffer from their collective traumas, which stayed unrecognised and therefore create psychological and political problems for the following generations - and for their adverseries. The phenomenon of the "closed circle of violence" created a phobic collective behaviour, which led for example Chechens to the illusionary declaration of independence in 1991. It also led to the individual overreaction of young Chechens or Palestinians, who became living bombs. The collective Trauma, if untreated, poses a threat to any peaceful political solution. + 1. Einleitung Ausgehend von der Fragestellung, warum gerade bei Tschetschenen und Palästinensern der Selbstmordterrorismus in den letzten Jahren so populär geworden ist, analysiert die Autorin die Geschichte dieser beiden Völker. Einer der Gründe ist bisher wenig beachtet worden. Der Einfluss eines kollektiven Traumas, das als solches nicht anerkannt, behandelt und auch nicht einer politischen Lösung zugeführt wurde. 2. Geschichte der Palästinenser und Tschetschenen Im Zuge der Errichtung Israels im Unabhängigkeitskrieg 1948 verlor die Hälfte des palästinensischen Volkes - 750.000 Menschen - ihre Heimat. Unter der Führung von Jassir Arafat kämpften sie in den Jahrzehnten danach - mit Gewalt und am Verhandlungstisch - um einen eigenen Staat. Das Recht auf Rückkehr spielte dabei immer eine besondere Rolle. Die "Nakbah", die als Katastrophe empfundene Vertreibung 1948, wurde dabei Bezugs- und Angelpunkt mehrer Generationen von Flüchtlingen. Die Weigerung Israels, die Mitverantwortung für die Vertreibung der Palästinenser zu übernehmen und das kollektive Trauma der Palästinenser anzuerkennen - aus Angst vor einer Infragestellung des eigenen Staates - ist einer der Gründe, warum der Nahostkonflikt bisher nicht gelöst werden konnte. Auch die Tschetschenen durften jahrzehntelang über die Deportation ihres Volkes nicht einmal sprechen. Hatte Josef Stallin sie erst unter dem Vorwand der Kollaboration mit Nazi-Deutschland deportiert, waren sie zwar nach seinem Tod in die Heimat zurückgekehrt, lebten dort aber jahrzehntelang weiterhin als "unzuverlässiges Volk". Das kollektive Trauma der Deportation konnte nur mündlich überliefert werden. Mit dem Zusammenbruch der Sowjetunion brach der ungelöste Konflikt zwischen Tschetschenien und Russland sofort auf, das Land ging in blutigen Kriegen unter. 3. Zusammenfassung Die kollektive Erinnerung ist in den vergangenen Jahrzehnten zu einem zentralen Forschungsthema geworden. Der vorsichtige Einsatz von in der Individualpsychologie gewonnenen Erkenntnissen in der Behandlung von kollektiven Traumata, um zu einer politischen Lösung zu kommen, ist eine Chance. Das Studium historischer Fakten in Kombination mit den Erkenntnissen der Psychologie und Psychiatrie bietet die Basis für eine politische Lösung. Die vorliegende Arbeit zeigt, dass kollektive Traumata, die nicht behandelt werden, immer wieder, auch Generationen später, zu kollektiven Reaktionen führen können, die auf den ersten Blick irrational erscheinen. Die vielleicht radikalste Form des politischen Widerstandes, das Selbstmordattentat, ist dafür ein Beispiel. + deu/ger + Böhlau + application/pdf + + Trauma und Terror: Zum palästinensischen und tschetschenischen Nationalismus + + + + Szyszkowitz, Tessa + Tessa + Szyszkowitz + + + + Trauma, Terror, Palestinians, Suicide attacks, Recognition, Chechens + ÖFOS 2002, Contemporary history + BIC Standard Subject Categories, Postwar 20th century history, from c 1945 to c 2000 (HBLW3) + ÖFOS 2002, Zeitgeschichte + + + 14.02 MB + + + 2007 + + + literature + 2007-01-01 + http://creativecommons.org/licenses/by-nc-nd/3.0/at/ + http://creativecommons.org/licenses/by-nc-nd/3.0/at/ + deu/ger + fwf_________::D 3929 + + + https://fedora.e-book.fwf.ac.at/fedora/get/o:116/bdef:Content/download + +
\ No newline at end of file From 6e84b3951f685061cc61e71c124cf4a7518ccee2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 17:57:41 +0200 Subject: [PATCH 073/166] GetCSV refactoring - moving classes to dhp-common that have dependency with GetCSV class (that was located in graph-mapper) --- .../common/aggregation}/AggregatorReport.java | 2 +- .../collection/CollectorException.java | 2 +- .../dnetlib/dhp/common/collection/GetCSV.java | 65 +++++++++++++++++++ .../common}/collection/HttpClientParams.java | 2 +- .../common}/collection/HttpConnector2.java | 4 +- 5 files changed, 70 insertions(+), 5 deletions(-) rename {dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common => dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation}/AggregatorReport.java (96%) rename {dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp => dhp-common/src/main/java/eu/dnetlib/dhp/common}/collection/CollectorException.java (93%) create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java rename {dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp => dhp-common/src/main/java/eu/dnetlib/dhp/common}/collection/HttpClientParams.java (97%) rename {dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp => dhp-common/src/main/java/eu/dnetlib/dhp/common}/collection/HttpConnector2.java (98%) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java similarity index 96% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java index 8e46ab92b..c5926848e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.aggregation.common; +package eu.dnetlib.dhp.common.aggregation; import java.io.Closeable; import java.io.IOException; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorException.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java similarity index 93% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorException.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java index 144d297e6..5d94c2f89 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorException.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.collection; +package eu.dnetlib.dhp.common.collection; public class CollectorException extends Exception { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java new file mode 100644 index 000000000..44f4121eb --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java @@ -0,0 +1,65 @@ + +package eu.dnetlib.dhp.common.collection; + +import java.io.*; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.opencsv.bean.CsvToBeanBuilder; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class GetCSV { + + public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath, + String modelClass) throws IOException, ClassNotFoundException { + getCsv(fileSystem, reader, hdfsPath, modelClass, ','); + + } + + public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath, + String modelClass, char delimiter) throws IOException, ClassNotFoundException { + + Path hdfsWritePath = new Path(hdfsPath); + FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { + fileSystem.delete(hdfsWritePath, false); + } + fsDataOutputStream = fileSystem.create(hdfsWritePath); + + try(BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8))){ + + ObjectMapper mapper = new ObjectMapper(); + + new CsvToBeanBuilder(reader) + .withType(Class.forName(modelClass)) + .withSeparator(delimiter) + .build() + .parse() + .forEach(line -> { + try { + writer.write(mapper.writeValueAsString(line)); + writer.newLine(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java similarity index 97% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java index ab0d5cc02..6fcec00dd 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.collection; +package eu.dnetlib.dhp.common.collection; /** * Bundles the http connection parameters driving the client behaviour. diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java similarity index 98% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java index 8493a3436..724f5f0e1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.collection; +package eu.dnetlib.dhp.common.collection; import static eu.dnetlib.dhp.utils.DHPUtils.*; @@ -15,7 +15,7 @@ import org.apache.http.HttpHeaders; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; /** * Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java From bfe8f5335c733177b338b54f1e63936c62b362cb Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 17:58:14 +0200 Subject: [PATCH 074/166] GetCSV refactoring - copied model classes in test path --- .../collection/models/CSVProgramme.java | 80 +++++++++++++++++ .../common/collection/models/CSVProject.java | 90 +++++++++++++++++++ .../common/collection/models/DOAJModel.java | 52 +++++++++++ .../collection/models/UnibiGoldModel.java | 45 ++++++++++ 4 files changed, 267 insertions(+) create mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java create mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java create mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java create mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java new file mode 100644 index 000000000..d17fcc75e --- /dev/null +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java @@ -0,0 +1,80 @@ + +package eu.dnetlib.dhp.common.collection.models; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; +import com.opencsv.bean.CsvIgnore; + +/** + * The model for the programme csv file + */ +public class CSVProgramme implements Serializable { + + @CsvBindByName(column = "code") + private String code; + + @CsvBindByName(column = "title") + private String title; + + @CsvBindByName(column = "shortTitle") + private String shortTitle; + + @CsvBindByName(column = "language") + private String language; + + @CsvIgnore + private String classification; + + @CsvIgnore + private String classification_short; + + public String getClassification_short() { + return classification_short; + } + + public void setClassification_short(String classification_short) { + this.classification_short = classification_short; + } + + public String getClassification() { + return classification; + } + + public void setClassification(String classification) { + this.classification = classification; + } + + public String getCode() { + return code; + } + + public void setCode(String code) { + this.code = code; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getShortTitle() { + return shortTitle; + } + + public void setShortTitle(String shortTitle) { + this.shortTitle = shortTitle; + } + + public String getLanguage() { + return language; + } + + public void setLanguage(String language) { + this.language = language; + } + +} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java new file mode 100644 index 000000000..19606a09e --- /dev/null +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java @@ -0,0 +1,90 @@ + +package eu.dnetlib.dhp.common.collection.models; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +/** + * the mmodel for the projects csv file + */ +public class CSVProject implements Serializable { + + @CsvBindByName(column = "id") + private String id; + + @CsvBindByName(column = "status") + private String status; + + @CsvBindByName(column = "programme") + private String programme; + + @CsvBindByName(column = "topics") + private String topics; + + @CsvBindByName(column = "title") + private String title; + + @CsvBindByName(column = "call") + private String call; + + @CsvBindByName(column = "subjects") + private String subjects; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } + + public String getProgramme() { + return programme; + } + + public void setProgramme(String programme) { + this.programme = programme; + } + + public String getTopics() { + return topics; + } + + public void setTopics(String topics) { + this.topics = topics; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getCall() { + return call; + } + + public void setCall(String call) { + this.call = call; + } + + public String getSubjects() { + return subjects; + } + + public void setSubjects(String subjects) { + this.subjects = subjects; + } + +} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java new file mode 100644 index 000000000..156ba3632 --- /dev/null +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.common.collection.models; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +public class DOAJModel implements Serializable { + @CsvBindByName(column = "Journal title") + private String journalTitle; + + @CsvBindByName(column = "Journal ISSN (print version)") + private String issn; + + @CsvBindByName(column = "Journal EISSN (online version)") + private String eissn; + + @CsvBindByName(column = "Review process") + private String reviewProcess; + + public String getJournalTitle() { + return journalTitle; + } + + public void setJournalTitle(String journalTitle) { + this.journalTitle = journalTitle; + } + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getEissn() { + return eissn; + } + + public void setEissn(String eissn) { + this.eissn = eissn; + } + + public String getReviewProcess() { + return reviewProcess; + } + + public void setReviewProcess(String reviewProcess) { + this.reviewProcess = reviewProcess; + } +} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java new file mode 100644 index 000000000..ae47262bf --- /dev/null +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java @@ -0,0 +1,45 @@ + +package eu.dnetlib.dhp.common.collection.models; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +public class UnibiGoldModel implements Serializable { + @CsvBindByName(column = "ISSN") + private String issn; + @CsvBindByName(column = "ISSN_L") + private String issn_l; + @CsvBindByName(column = "TITLE") + private String title; + @CsvBindByName(column = "TITLE_SOURCE") + private String title_source; + + public String getIssn() { + return issn; + } + + public void setIssn(String issn) { + this.issn = issn; + } + + public String getIssn_l() { + return issn_l; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getTitle_source() { + return title_source; + } + + public void setTitle_source(String title_source) { + this.title_source = title_source; + } +} From 733bcaecf621657dfbaf1aec886b3071216dd924 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 17:58:52 +0200 Subject: [PATCH 075/166] GetCSV refactoring - added test class (all the tests are disabled since they refer to remote resource) --- .../dhp/common/collection/GetCSVTest.java | 245 ++++++++++++++++++ 1 file changed, 245 insertions(+) create mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java new file mode 100644 index 000000000..bf5e3dedb --- /dev/null +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java @@ -0,0 +1,245 @@ + +package eu.dnetlib.dhp.common.collection; + +import java.io.*; +import java.nio.file.Files; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.common.collection.models.CSVProgramme; +import eu.dnetlib.dhp.common.collection.models.CSVProject; +import eu.dnetlib.dhp.common.collection.models.DOAJModel; +import eu.dnetlib.dhp.common.collection.models.UnibiGoldModel; + +public class GetCSVTest { + + private static String workingDir; + + private static LocalFileSystem fs; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(GetCSVTest.class.getSimpleName()) + .toString(); + + fs = FileSystem.getLocal(new Configuration()); + } + + @Disabled + @Test + void getProgrammeFileTest() throws Exception { + + String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv"; + + GetCSV + .getCsv( + fs, new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/programme", + "eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';'); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + CSVProgramme csvp = new ObjectMapper().readValue(line, CSVProgramme.class); + if (count == 0) { + Assertions.assertTrue(csvp.getCode().equals("H2020-EU.5.f.")); + Assertions + .assertTrue( + csvp + .getTitle() + .startsWith( + "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); + Assertions + .assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); + Assertions.assertTrue(csvp.getShortTitle().equals("")); + Assertions.assertTrue(csvp.getLanguage().equals("en")); + } + if (count == 28) { + Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.5.4.")); + Assertions + .assertTrue( + csvp + .getTitle() + .equals( + "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); + Assertions + .assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); + Assertions.assertTrue(csvp.getLanguage().equals("de")); + } + if (count == 229) { + Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.2.")); + Assertions + .assertTrue( + csvp + .getTitle() + .equals( + "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); + Assertions + .assertTrue( + csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); + Assertions.assertTrue(csvp.getLanguage().equals("en")); + } + Assertions.assertTrue(csvp.getCode() != null); + Assertions.assertTrue(csvp.getCode().startsWith("H2020")); + count += 1; + } + + Assertions.assertEquals(767, count); + } + + @Disabled + @Test + void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException { + String fileURL = "https://cordis.europa.eu/data/cordis-h2020projects.csv"; + // String fileURL = "/Users/miriam.baglioni/Downloads/cordis-h2020projects.csv"; + + GetCSV + .getCsv( + fs, + new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))) + // new BufferedReader(new FileReader(fileURL)) + , workingDir + "/projects", + "eu.dnetlib.dhp.common.collection.models.CSVProject", ';'); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + CSVProject csvp = new ObjectMapper().readValue(line, CSVProject.class); + if (count == 0) { + Assertions.assertTrue(csvp.getId().equals("771736")); + Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.1.")); + Assertions.assertTrue(csvp.getTopics().equals("ERC-2017-COG")); + + } + if (count == 22882) { + Assertions.assertTrue(csvp.getId().equals("752903")); + Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.3.2.")); + Assertions.assertTrue(csvp.getTopics().equals("MSCA-IF-2016")); + } + if (count == 223023) { + Assertions.assertTrue(csvp.getId().equals("861952")); + Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.4.e.")); + Assertions.assertTrue(csvp.getTopics().equals("SGA-SEWP-COST-2019")); + } + Assertions.assertTrue(csvp.getId() != null); + Assertions.assertTrue(csvp.getProgramme().startsWith("H2020")); + count += 1; + } + + Assertions.assertEquals(34957, count); + } + + @Disabled + @Test + void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException { + + String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; + + GetCSV + .getCsv( + fs, new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/programme", + "eu.dnetlib.dhp.common.collection.models.UnibiGoldModel", ','); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class); + if (count == 0) { + Assertions.assertTrue(unibi.getIssn().equals("0001-625X")); + Assertions.assertTrue(unibi.getIssn_l().equals("0001-625X")); + Assertions.assertTrue(unibi.getTitle().equals("Acta Mycologica")); + + } + if (count == 43158) { + Assertions.assertTrue(unibi.getIssn().equals("2088-6330")); + Assertions.assertTrue(unibi.getIssn_l().equals("2088-6330")); + Assertions.assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama")); + + } + if (count == 67027) { + Assertions.assertTrue(unibi.getIssn().equals("2658-7068")); + Assertions.assertTrue(unibi.getIssn_l().equals("2308-2488")); + Assertions.assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ.")); + } + + count += 1; + } + + Assertions.assertEquals(67028, count); + } + + @Disabled + @Test + void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException { + + String fileURL = "https://doaj.org/csv"; + + try (BufferedReader in = new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))) { + try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")))) { + String line; + while ((line = in.readLine()) != null) { + writer.println(line.replace("\\\"", "\"")); + } + } + } + + GetCSV + .getCsv( + fs, new BufferedReader( + new FileReader("/tmp/DOAJ_1.csv")), + workingDir + "/programme", + "eu.dnetlib.dhp.common.collection.models.DOAJModel", ','); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); + if (count == 0) { + Assertions.assertTrue(doaj.getIssn().equals("0001-3765")); + Assertions.assertTrue(doaj.getEissn().equals("1678-2690")); + Assertions.assertTrue(doaj.getJournalTitle().equals("Anais da Academia Brasileira de Ciências")); + + } + if (count == 7902) { + + Assertions.assertTrue(doaj.getIssn().equals("")); + Assertions.assertTrue(doaj.getEissn().equals("2055-7159")); + Assertions.assertTrue(doaj.getJournalTitle().equals("BJR|case reports")); + } + if (count == 16703) { + + Assertions.assertTrue(doaj.getIssn().equals("")); + Assertions.assertTrue(doaj.getEissn().equals("2788-6298")); + Assertions + .assertTrue(doaj.getJournalTitle().equals("Teacher Education through Flexible Learning in Africa")); + } + + count += 1; + } + + Assertions.assertEquals(16709, count); + } + +} From 7402daf51a34df27fa303c53143ed61ed5c48e3e Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 17:59:19 +0200 Subject: [PATCH 076/166] GetCSV refactoring - added dependency to open-csv lib --- dhp-common/pom.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 4c7810c47..c057123b1 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -117,6 +117,11 @@ eu.dnetlib.dhp dhp-schemas + + + com.opencsv + opencsv + From d36e925277b71168a0dcb440a2f5e4d1624d5359 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:00:21 +0200 Subject: [PATCH 077/166] GetCSV refactoring - moved under model package --- .../dhp/actionmanager/project/utils/{ => model}/EXCELTopic.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/{ => model}/EXCELTopic.java (97%) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELTopic.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/EXCELTopic.java similarity index 97% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELTopic.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/EXCELTopic.java index 5607df118..fa2e3422e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELTopic.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/EXCELTopic.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.actionmanager.project.utils; +package eu.dnetlib.dhp.actionmanager.project.utils.model; import java.io.Serializable; From b62cd656a78b763c0db77dd9f625bb2cfab99937 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:01:10 +0200 Subject: [PATCH 078/166] GetCSV refactoring - changed the model to store only the information needed --- .../utils/{ => model}/CSVProgramme.java | 24 +++++---- .../project/utils/model/CSVProject.java | 51 +++++++++++++++++++ 2 files changed, 64 insertions(+), 11 deletions(-) rename dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/{ => model}/CSVProgramme.java (77%) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java similarity index 77% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProgramme.java rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java index d486f0104..ea89a75eb 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProgramme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java @@ -1,20 +1,31 @@ -package eu.dnetlib.dhp.actionmanager.project.utils; +package eu.dnetlib.dhp.actionmanager.project.utils.model; import java.io.Serializable; +import com.opencsv.bean.CsvBindByName; + /** * The model for the programme csv file */ public class CSVProgramme implements Serializable { - private String rcn; + @CsvBindByName(column = "code") private String code; + @CsvBindByName(column = "title") private String title; + + @CsvBindByName(column = "shortTitle") private String shortTitle; + + @CsvBindByName(column = "language") private String language; + + @CsvBindByName(column = "classification") private String classification; + + @CsvBindByName(column = "classification_short") private String classification_short; public String getClassification_short() { @@ -33,14 +44,6 @@ public class CSVProgramme implements Serializable { this.classification = classification; } - public String getRcn() { - return rcn; - } - - public void setRcn(String rcn) { - this.rcn = rcn; - } - public String getCode() { return code; } @@ -73,5 +76,4 @@ public class CSVProgramme implements Serializable { this.language = language; } -// } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java new file mode 100644 index 000000000..73cea0539 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java @@ -0,0 +1,51 @@ + +package eu.dnetlib.dhp.actionmanager.project.utils.model; + +import java.io.Serializable; + +import com.opencsv.bean.CsvBindByName; + +/** + * the mmodel for the projects csv file + */ +public class CSVProject implements Serializable { + + @CsvBindByName(column = "id") + private String id; + + @CsvBindByName(column = "programme") + private String programme; + + @CsvBindByName(column = "topics") + private String topics; + + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + + + public String getProgramme() { + return programme; + } + + public void setProgramme(String programme) { + this.programme = programme; + } + + public String getTopics() { + return topics; + } + + public void setTopics(String topics) { + this.topics = topics; + } + + + +} From 4317211a2b5e64ad9e4a969642ab674182854593 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:03:14 +0200 Subject: [PATCH 079/166] GetCSV refactoring - refactoring due to movement --- .../dnetlib/dhp/actionmanager/project/utils/EXCELParser.java | 2 ++ .../eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java index 5ce730692..95c6f987a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java @@ -17,6 +17,8 @@ import org.apache.poi.ss.usermodel.Row; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic; + /** * Reads a generic excel file and maps it into classes that mirror its schema */ diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java index 359e46fc7..9e73cbc37 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java @@ -16,8 +16,8 @@ import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpConnector2; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpConnector2; /** * Applies the parsing of an excel file and writes the Serialization of it in hdfs From e9fc3ef3bc4f83ab6e0e93188a5782cefdea72d3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:03:41 +0200 Subject: [PATCH 080/166] GetCSV refactoring - changed to use the new class to get and write the csv file --- .../actionmanager/project/utils/ReadCSV.java | 72 +++---------------- 1 file changed, 9 insertions(+), 63 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java index 1ae775bec..c967d4cae 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java @@ -1,36 +1,21 @@ package eu.dnetlib.dhp.actionmanager.project.utils; -import java.io.BufferedWriter; -import java.io.Closeable; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.nio.charset.StandardCharsets; +import java.io.*; import java.util.Optional; import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - -import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.collection.HttpConnector2; +import eu.dnetlib.dhp.common.collection.GetCSV; +import eu.dnetlib.dhp.common.collection.HttpConnector2; /** * Applies the parsing of a csv file and writes the Serialization of it in hdfs */ -public class ReadCSV implements Closeable { - private static final Log log = LogFactory.getLog(ReadCSV.class); - - private final BufferedWriter writer; - private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private final String csvFile; - private final char delimiter; +public class ReadCSV { public static void main(final String[] args) throws Exception { final ArgumentApplicationParser parser = new ArgumentApplicationParser( @@ -50,57 +35,18 @@ public class ReadCSV implements Closeable { char del = ';'; if (delimiter.isPresent()) del = delimiter.get().charAt(0); - try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL, del)) { - log.info("Getting CSV file..."); - readCSV.execute(classForName); - } - - } - - public void execute(final String classForName) - throws IOException, ClassNotFoundException, IllegalAccessException, InstantiationException { - CSVParser csvParser = new CSVParser(); - csvParser - .parse(csvFile, classForName, delimiter) - .stream() - .forEach(this::write); - } - - @Override - public void close() throws IOException { - writer.close(); - } - - public ReadCSV( - final String hdfsPath, - final String hdfsNameNode, - final String fileURL, - char delimiter) - throws Exception { Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); - HttpConnector2 httpConnector = new HttpConnector2(); + FileSystem fileSystem = FileSystem.get(conf); - Path hdfsWritePath = new Path(hdfsPath); + BufferedReader reader = new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))); - if (fileSystem.exists(hdfsWritePath)) { - fileSystem.delete(hdfsWritePath, false); - } - final FSDataOutputStream fos = fileSystem.create(hdfsWritePath); + GetCSV.getCsv(fileSystem, reader, hdfsPath, classForName, del); - this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8)); - this.csvFile = httpConnector.getInputSource(fileURL); - this.delimiter = delimiter; - } + reader.close(); - protected void write(final Object p) { - try { - writer.write(OBJECT_MAPPER.writeValueAsString(p)); - writer.newLine(); - } catch (final Exception e) { - throw new RuntimeException(e); - } } } From 7a789423aa37fa3f207f317b2f9181c4420c3fe5 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:04:27 +0200 Subject: [PATCH 081/166] GetCSV refactoring - refactoring due to movement of classes --- .../eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java | 2 +- .../eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java index 40cf5ee53..686b0fc7f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java @@ -20,7 +20,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import scala.Tuple2; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java index b1a381415..8efc76a0e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java @@ -18,7 +18,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import scala.Tuple2; From f0845e9865b5714c8e1f14ffed9eddfc4b3cafd6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:04:58 +0200 Subject: [PATCH 082/166] GetCSV refactoring - refactoring due to movement of classes --- .../dhp/actionmanager/project/SparkAtomicActionJob.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java index a4a0bf6a4..cc1411b31 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.actionmanager.project; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.Arrays; -import java.util.HashMap; import java.util.Objects; import java.util.Optional; @@ -22,9 +21,9 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject; -import eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; +import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.schema.action.AtomicAction; From 335a824e348c9e1eaf81d6c021f48323a7e599d6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:10:10 +0200 Subject: [PATCH 083/166] GetCSV refactoring - fixed issue --- .../eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java index 95c6f987a..a520176f4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java @@ -32,7 +32,7 @@ public class EXCELParser { XSSFSheet sheet = wb.getSheet(sheetName); - if (sheetName == null) { + if (sheet == null) { throw new IllegalArgumentException("Sheet name " + sheetName + " not present in current file"); } From ab8abd61bbf29b6f6c1ae93b51eb63fc3f219ab8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:11:07 +0200 Subject: [PATCH 084/166] GetCSV refactoring - refactoring due to movement of classes --- .../dhp/actionmanager/project/oozie_app/workflow.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml index e4f2715fb..bd864a6aa 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml @@ -58,7 +58,7 @@ --hdfsNameNode${nameNode} --fileURL${projectFileURL} --hdfsPath${workingDir}/projects - --classForNameeu.dnetlib.dhp.actionmanager.project.utils.CSVProject + --classForNameeu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject @@ -70,7 +70,7 @@ --hdfsNameNode${nameNode} --fileURL${programmeFileURL} --hdfsPath${workingDir}/programme - --classForNameeu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme + --classForNameeu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme @@ -83,7 +83,7 @@ --fileURL${topicFileURL} --hdfsPath${workingDir}/topic --sheetName${sheetName} - --classForNameeu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic + --classForNameeu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic From 9da74b544adf252f84cec924598d88e71293b015 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:12:15 +0200 Subject: [PATCH 085/166] GetCSV refactoring - refactoring due to movement of classes --- .../dnetlib/dhp/actionmanager/project/EXCELParserTest.java | 6 +++--- .../actionmanager/project/PrepareH2020ProgrammeTest.java | 2 +- .../dhp/actionmanager/project/PrepareProjectTest.java | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java index cc36421a0..d27a732ea 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java @@ -13,8 +13,8 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import eu.dnetlib.dhp.actionmanager.project.utils.EXCELParser; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpConnector2; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpConnector2; @Disabled public class EXCELParserTest { @@ -25,7 +25,7 @@ public class EXCELParserTest { @BeforeAll public static void beforeAll() throws IOException { - workingDir = Files.createTempDirectory(CSVParserTest.class.getSimpleName()); + workingDir = Files.createTempDirectory(EXCELParserTest.class.getSimpleName()); } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java index f128b5610..680872126 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java @@ -21,7 +21,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme; public class PrepareH2020ProgrammeTest { diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java index f0f3532aa..ee5f78b0c 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java @@ -21,7 +21,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; public class PrepareProjectTest { From d57b2bb9273c33d3348a3abf282245eaec1e2ae2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:12:51 +0200 Subject: [PATCH 086/166] GetCSV refactoring - removing not needed dependency --- dhp-workflows/dhp-aggregation/pom.xml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml index 294287008..98e22d8a3 100644 --- a/dhp-workflows/dhp-aggregation/pom.xml +++ b/dhp-workflows/dhp-aggregation/pom.xml @@ -84,14 +84,6 @@ json - - - org.apache.commons - commons-csv - 1.8 - - - org.apache.poi From 6b9e1bf2e3787bb5eb802508721a174981b7bffe Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:17:50 +0200 Subject: [PATCH 087/166] GetCSV refactoring - removing not needed dependency --- .../java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java index 549169673..dbf053a72 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java @@ -6,6 +6,8 @@ import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; + public abstract class ReportingJob { /** From 8769dd8eef6668bbce2e238175f2d36948c2b258 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 18:20:56 +0200 Subject: [PATCH 088/166] GetCSV refactoring - refactoring due to movement of classes --- .../java/eu/dnetlib/dhp/collection/CollectorWorker.java | 4 +++- .../dnetlib/dhp/collection/CollectorWorkerApplication.java | 4 +++- .../eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java | 4 ++-- .../collection/plugin/mongodb/MDStoreCollectorPlugin.java | 4 ++-- .../plugin/mongodb/MongoDbDumpCollectorPlugin.java | 4 ++-- .../dhp/collection/plugin/oai/OaiCollectorPlugin.java | 6 +++--- .../eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java | 6 +++--- .../dhp/collection/plugin/oai/OaiIteratorFactory.java | 6 +++--- .../dhp/collection/plugin/rest/RestCollectorPlugin.java | 6 +++--- .../eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java | 4 ++-- .../dnetlib/dhp/transformation/TransformSparkJobNode.java | 2 +- .../eu/dnetlib/dhp/collection/CollectionWorkflowTest.java | 1 + .../dhp/collection/plugin/rest/RestCollectorPluginTest.java | 6 +++--- .../dhp/collection/plugin/rest/RestIteratorTest.java | 2 +- 14 files changed, 32 insertions(+), 27 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java index d0872da1d..2ea3f35cc 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java @@ -16,7 +16,6 @@ import org.apache.hadoop.io.compress.DeflateCodec; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.aggregation.common.ReporterCallback; import eu.dnetlib.dhp.aggregation.common.ReportingJob; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; @@ -24,6 +23,9 @@ import eu.dnetlib.dhp.collection.plugin.mongodb.MDStoreCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.rest.RestCollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; public class CollectorWorker extends ReportingJob { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java index 545cbab0c..708d2da65 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java @@ -13,8 +13,10 @@ import org.apache.hadoop.fs.FileSystem; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java index 457f63468..841d42fea 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java @@ -3,9 +3,9 @@ package eu.dnetlib.dhp.collection.plugin; import java.util.stream.Stream; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; public interface CollectorPlugin { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java index 549c59720..ad28a7261 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java @@ -13,11 +13,11 @@ import org.slf4j.LoggerFactory; import com.mongodb.client.MongoCollection; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.common.MdstoreClient; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; public class MDStoreCollectorPlugin implements CollectorPlugin { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java index ec5bab448..036404141 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java @@ -12,10 +12,10 @@ import java.util.zip.GZIPInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; import eu.dnetlib.dhp.utils.DHPUtils; public class MongoDbDumpCollectorPlugin implements CollectorPlugin { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java index 9918e4abe..878e286e0 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java @@ -13,11 +13,11 @@ import com.google.common.base.Splitter; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpClientParams; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; public class OaiCollectorPlugin implements CollectorPlugin { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java index 331dee6b4..3f767fd31 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java @@ -19,10 +19,10 @@ import org.dom4j.io.XMLWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpConnector2; import eu.dnetlib.dhp.collection.XmlCleaner; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpConnector2; public class OaiIterator implements Iterator { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java index 48f6a94c8..1838223c2 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java @@ -3,9 +3,9 @@ package eu.dnetlib.dhp.collection.plugin.oai; import java.util.Iterator; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; -import eu.dnetlib.dhp.collection.HttpClientParams; -import eu.dnetlib.dhp.collection.HttpConnector2; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.HttpClientParams; +import eu.dnetlib.dhp.common.collection.HttpConnector2; public class OaiIteratorFactory { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java index be2bbcece..997948687 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java @@ -9,11 +9,11 @@ import java.util.stream.StreamSupport; import org.apache.commons.lang3.StringUtils; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpClientParams; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; /** * TODO: delegate HTTP requests to the common HttpConnector2 implementation. diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java index a90d259b4..64a041fd4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java @@ -30,9 +30,9 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpClientParams; import eu.dnetlib.dhp.collection.JsonUtils; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; /** * log.info(...) equal to log.trace(...) in the application-logs diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index a01703675..4fe79bf76 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -23,8 +23,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.aggregation.common.AggregationCounter; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionWorkflowTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionWorkflowTest.java index 05dd6b1d3..1a10b5f64 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionWorkflowTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionWorkflowTest.java @@ -18,6 +18,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; @TestMethodOrder(MethodOrderer.OrderAnnotation.class) diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java index f2b873e10..f708c367b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPluginTest.java @@ -12,10 +12,10 @@ import org.junit.jupiter.api.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.collection.CollectorException; -import eu.dnetlib.dhp.collection.HttpClientParams; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; /** * @author js, Andreas Czerniak diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestIteratorTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestIteratorTest.java index 9f75bd468..906f69dc9 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestIteratorTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/RestIteratorTest.java @@ -9,7 +9,7 @@ import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.collection.HttpClientParams; +import eu.dnetlib.dhp.common.collection.HttpClientParams; /** * From ed183d878e5899aa998d3dd6b0de0ffa10353752 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 09:28:51 +0200 Subject: [PATCH 089/166] GetCSV refactoring - modified test classes due to change in the model of projects and programme --- .../project/PrepareH2020ProgrammeTest.java | 2 +- .../project/PrepareProjectTest.java | 1 + .../project/SparkUpdateProjectTest.java | 2 +- .../project/preparedProgramme_whole.json | 277 ++++++++++++++++++ .../project/prepared_projects.json | 34 +-- .../project/projects_subset.json | 32 +- .../project/whole_programme.json.gz | Bin 31593 -> 29456 bytes 7 files changed, 313 insertions(+), 35 deletions(-) create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java index 680872126..68aacdc8b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java @@ -92,7 +92,7 @@ public class PrepareH2020ProgrammeTest { Assertions.assertEquals(0, verificationDataset.filter("classification = ''").count()); - // tmp.foreach(csvProgramme -> System.out.println(OBJECT_MAPPER.writeValueAsString(csvProgramme))); + //tmp.foreach(csvProgramme -> System.out.println(OBJECT_MAPPER.writeValueAsString(csvProgramme))); Assertions .assertEquals( diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java index ee5f78b0c..d0c50a054 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjectTest.java @@ -9,6 +9,7 @@ import org.apache.commons.io.FileUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java index a77dbace4..58365e026 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/SparkUpdateProjectTest.java @@ -78,7 +78,7 @@ public class SparkUpdateProjectTest { "-programmePath", getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz") + "/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json") .getPath(), "-projectPath", getClass().getResource("/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json").getPath(), diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json new file mode 100644 index 000000000..e8e04e8db --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json @@ -0,0 +1,277 @@ +{"code":"H2020-EU.5.g.","title":"Take due and proportional precautions in research and innovation activities by anticipating and assessing potential environmental, health and safety impacts","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Take due and proportional precautions in research and innovation activities by anticipating and assessing potential environmental, health and safety impacts","classification_short":"Science with and for Society | Take due and proportional precautions in research and innovation activities by anticipating and assessing potential environmental, health and safety impacts"} +{"code":"H2020-EU.3.4.2.1.","title":"A substantial reduction of traffic congestion","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | A substantial reduction of traffic congestion","classification_short":"Societal Challenges | Transport | Mobility, safety and security | A substantial reduction of traffic congestion"} +{"code":"H2020-EU.3.4.5.4.","title":"ITD Airframe","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ITD Airframe","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ITD Airframe"} +{"code":"H2020-EU.3.3.8.1.","title":"Increase the electrical efficiency and the durability of the different fuel cells used for power production to levels which can compete with conventional technologies, while reducing costs","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives) | Increase the electrical efficiency and the durability of the different fuel cells used for power production to levels which can compete with conventional technologies, while reducing costs","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives) | Increase the electrical efficiency and the durability of the different fuel cells used for power production to levels which can compete with conventional technologies, while reducing costs"} +{"code":"H2020-EU.3.7.1.","title":"Fight crime, illegal trafficking and terrorism, including understanding and tackling terrorist ideas and beliefs","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Fight crime, illegal trafficking and terrorism, including understanding and tackling terrorist ideas and beliefs","classification_short":"Societal Challenges | Secure societies | Fight crime, illegal trafficking and terrorism, including understanding and tackling terrorist ideas and beliefs"} +{"code":"H2020-EU.3.4.1.1.","title":"Making aircraft, vehicles and vessels cleaner and quieter will improve environmental performance and reduce perceived noise and vibration","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment | Making aircraft, vehicles and vessels cleaner and quieter will improve environmental performance and reduce perceived noise and vibration","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment | Making aircraft, vehicles and vessels cleaner and quieter will improve environmental performance and reduce perceived noise and vibration"} +{"code":"H2020-EU.1.4.3.","title":"Reinforcing European research infrastructure policy and international cooperation","shortTitle":"Research infrastructure policy and international cooperation","language":"en","classification":"Excellent science | Research Infrastructures | Reinforcing European research infrastructure policy and international cooperation","classification_short":"Excellent Science | Research Infrastructures | Research infrastructure policy and international cooperation"} +{"code":"H2020-EU.1.4.","title":"EXCELLENT SCIENCE - Research Infrastructures","shortTitle":"Research Infrastructures","language":"en","classification":"Excellent science | Research Infrastructures","classification_short":"Excellent Science | Research Infrastructures"} +{"code":"H2020-EU.3.4.6.1.","title":"Reduce the production cost of fuel cell systems to be used in transport applications, while increasing their lifetime to levels which can compete with conventional technologies","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | FCH2 (transport objectives) | Reduce the production cost of fuel cell systems to be used in transport applications, while increasing their lifetime to levels which can compete with conventional technologies","classification_short":"Societal Challenges | Transport | FCH2 (transport objectives) | Reduce the production cost of fuel cell systems to be used in transport applications, while increasing their lifetime to levels which can compete with conventional technologies"} +{"code":"H2020-EU.3.4.5.5.","title":"ITD Engines","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ITD Engines","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ITD Engines"} +{"code":"H2020-EU.2.1.1.7.3.","title":"Multi-disciplinary approaches for smart systems, supported by developments in holistic design and advanced manufacturing to realise self-reliant and adaptable smart systems having sophisticated interfaces and offering complex functionalities based on, for example, the seamless integration of sensing, actuating, processing, energy provision and networking","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL | Multi-disciplinary approaches for smart systems, supported by developments in holistic design and advanced manufacturing to realise self-reliant and adaptable smart systems having sophisticated interfaces and offering complex functionalities based on, for example, the seamless integration of sensing, actuating, processing, energy provision and networking","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL | Multi-disciplinary approaches for smart systems, supported by developments in holistic design and advanced manufacturing to realise self-reliant and adaptable smart systems having sophisticated interfaces and offering complex functionalities based on, for example, the seamless integration of sensing, actuating, processing, energy provision and networking"} +{"code":"H2020-EU.3.1.6.1.","title":"Promoting integrated care","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Health care provision and integrated care | Promoting integrated care","classification_short":"Societal Challenges | Health | Health care provision and integrated care | Promoting integrated care"} +{"code":"H2020-EU.3.7.6.","title":"Ensure privacy and freedom, including in the Internet and enhance the societal, legal and ethical understanding of all areas of security, risk and management","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Ensure privacy and freedom, including in the Internet and enhance the societal, legal and ethical understanding of all areas of security, risk and management","classification_short":"Societal Challenges | Secure societies | Ensure privacy and freedom, including in the Internet and enhance the societal, legal and ethical understanding of all areas of security, risk and management"} +{"code":"H2020-EU.3.4.2.3.","title":"Developing new concepts of freight transport and logistics","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | Developing new concepts of freight transport and logistics","classification_short":"Societal Challenges | Transport | Mobility, safety and security | Developing new concepts of freight transport and logistics"} +{"code":"H2020-EU.3.3.2.1.","title":"Develop the full potential of wind energy","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop the full potential of wind energy","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop the full potential of wind energy"} +{"code":"H2020-EU.3.2.5.","title":"Cross-cutting marine and maritime research","shortTitle":"Cross-cutting marine and maritime research","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research"} +{"code":"H2020-EU.3.4.7.","title":"SESAR JU","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | SESAR JU","classification_short":"Societal Challenges | Transport | SESAR JU"} +{"code":"H2020-EU.2.1.3.3.","title":"Management of materials components","shortTitle":"Management of materials components","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Management of materials components","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Management of materials components"} +{"code":"H2020-EU.3.3.3.","title":"Alternative fuels and mobile energy sources","shortTitle":"Alternative fuels and mobile energy sources","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources"} +{"code":"H2020-EU.7.","title":"THE EUROPEAN INSTITUTE OF INNOVATION AND TECHNOLOGY (EIT)","shortTitle":"European Institute of Innovation and Technology (EIT)","language":"en","classification":"THE EUROPEAN INSTITUTE OF INNOVATION AND TECHNOLOGY (EIT)","classification_short":"European Institute of Innovation and Technology (EIT)"} +{"code":"H2020-EU.3.5.4.1.","title":"Strengthen eco-innovative technologies, processes, services and products including exploring ways to reduce the quantities of raw materials in production and consumption, and overcoming barriers in this context and boost their market uptake","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Strengthen eco-innovative technologies, processes, services and products including exploring ways to reduce the quantities of raw materials in production and consumption, and overcoming barriers in this context and boost their market uptake","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Strengthen eco-innovative technologies, processes, services and products including exploring ways to reduce the quantities of raw materials in production and consumption, and overcoming barriers in this context and boost their market uptake"} +{"code":"H2020-EU.3.1.4.","title":"Active ageing and self-management of health","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Active ageing and self-management of health","classification_short":"Societal Challenges | Health | Active ageing and self-management of health"} +{"code":"H2020-EU.1.","title":"Excellent science","shortTitle":"Excellent Science","language":"en","classification":"Excellent science","classification_short":"Excellent Science"} +{"code":"H2020-EU.3.5.6.1.","title":"Identifying resilience levels via observations, monitoring and modelling","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Cultural heritage | Identifying resilience levels via observations, monitoring and modelling","classification_short":"Societal Challenges | Climate and environment | Cultural heritage | Identifying resilience levels via observations, monitoring and modelling"} +{"code":"H2020-EU.3.2.4.3.","title":"Supporting market development for bio-based products and processes","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy | Supporting market development for bio-based products and processes","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy | Supporting market development for bio-based products and processes"} +{"code":"H2020-EU.2.1.6.1.","title":"Enabling European competitiveness, non-dependence and innovation of the European space sector","shortTitle":"Competitiveness, non-dependence and innovation","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European competitiveness, non-dependence and innovation of the European space sector","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Competitiveness, non-dependence and innovation"} +{"code":"H2020-EU.4.b.","title":"Twinning of research institutions","shortTitle":"Twinning of research institutions","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Twinning of research institutions","classification_short":"Spreading excellence and widening participation | Twinning of research institutions"} +{"code":"H2020-EU.3.1.7.6.","title":"Psychiatric diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Psychiatric diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Psychiatric diseases"} +{"code":"H2020-EU.3.1.2.2.","title":"Improving diagnosis and prognosis","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease | Improving diagnosis and prognosis","classification_short":"Societal Challenges | Health | Preventing disease | Improving diagnosis and prognosis"} +{"code":"H2020-EU.3.4.5.3.","title":"IADP Fast Rotorcraft","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | IADP Fast Rotorcraft","classification_short":"Societal Challenges | Transport | CLEANSKY2 | IADP Fast Rotorcraft"} +{"code":"H2020-EU.3.1.3.1.","title":"Treating disease, including developing regenerative medicine","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Treating and managing disease | Treating disease, including developing regenerative medicine","classification_short":"Societal Challenges | Health | Treating and managing disease | Treating disease, including developing regenerative medicine"} +{"code":"H2020-EU.3.4.3.3.","title":"Advanced production processes","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | Advanced production processes","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | Advanced production processes"} +{"code":"H2020-EU.3.1.7.","title":"Innovative Medicines Initiative 2 (IMI2)","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2)","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2)"} +{"code":"H2020-EU.3.6.3.2.","title":"Research into European countries' and regions' history, literature, art, philosophy and religions and how these have informed contemporary European diversity","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity | Research into European countries' and regions' history, literature, art, philosophy and religions and how these have informed contemporary European diversity","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies | Research into European countries' and regions' history, literature, art, philosophy and religions and how these have informed contemporary European diversity"} +{"code":"H2020-EU.3.5.1.2.","title":"Assess impacts, vulnerabilities and develop innovative cost-effective adaptation and risk prevention and management measures","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change | Assess impacts, vulnerabilities and develop innovative cost-effective adaptation and risk prevention and management measures","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change | Assess impacts, vulnerabilities and develop innovative cost-effective adaptation and risk prevention and management measures"} +{"code":"H2020-EU.3.6.1.","title":"Inclusive societies","shortTitle":"Inclusive societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies"} +{"code":"H2020-EU.3.2.","title":"SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy","shortTitle":"Food, agriculture, forestry, marine research and bioeconomy","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy"} +{"code":"H2020-EU.2.1.6.1.2.","title":"Boost innovation between space and non-space sectors","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European competitiveness, non-dependence and innovation of the European space sector | Boost innovation between space and non-space sectors","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Competitiveness, non-dependence and innovation | Boost innovation between space and non-space sectors"} +{"code":"H2020-EU.2.1.3.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies - Advanced materials","shortTitle":"Advanced materials","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials"} +{"code":"H2020-EU.2.1.2.3.","title":"Developing the societal dimension of nanotechnology","shortTitle":"Societal dimension of nanotechnology","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Developing the societal dimension of nanotechnology","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Societal dimension of nanotechnology"} +{"code":"H2020-EU.4.","title":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION","shortTitle":"Spreading excellence and widening participation","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION","classification_short":"Spreading excellence and widening participation"} +{"code":"H2020-EU.3.6.1.2.","title":"Trusted organisations, practices, services and policies that are necessary to build resilient, inclusive, participatory, open and creative societies in Europe, in particular taking into account migration, integration and demographic change","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | Trusted organisations, practices, services and policies that are necessary to build resilient, inclusive, participatory, open and creative societies in Europe, in particular taking into account migration, integration and demographic change","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | Trusted organisations, practices, services and policies that are necessary to build resilient, inclusive, participatory, open and creative societies in Europe, in particular taking into account migration, integration and demographic change"} +{"code":"H2020-EU.3.4.2.","title":"Better mobility, less congestion, more safety and security","shortTitle":"Mobility, safety and security","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security","classification_short":"Societal Challenges | Transport | Mobility, safety and security"} +{"code":"H2020-EU.3.1.7.13.","title":"Other","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Other","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Other"} +{"code":"H2020-EU.3.3.3.3.","title":"New alternative fuels","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources | New alternative fuels","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources | New alternative fuels"} +{"code":"H2020-EU.2.1.3.5.","title":"Materials for creative industries, including heritage","shortTitle":"Materials for creative industries, including heritage","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Materials for creative industries, including heritage","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Materials for creative industries, including heritage"} +{"code":"H2020-EU.3.3.3.2.","title":"Reducing time to market for hydrogen and fuel cells technologies","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources | Reducing time to market for hydrogen and fuel cells technologies","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources | Reducing time to market for hydrogen and fuel cells technologies"} +{"code":"H2020-EU.5.d.","title":"Encourage citizens to engage in science through formal and informal science education, and promote the diffusion of science-based activities, namely in science centres and through other appropriate channels","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Encourage citizens to engage in science through formal and informal science education, and promote the diffusion of science-based activities, namely in science centres and through other appropriate channels","classification_short":"Science with and for Society | Encourage citizens to engage in science through formal and informal science education, and promote the diffusion of science-based activities, namely in science centres and through other appropriate channels"} +{"code":"H2020-EU.3.1.","title":"SOCIETAL CHALLENGES - Health, demographic change and well-being","shortTitle":"Health","language":"en","classification":"Societal challenges | Health, demographic change and well-being","classification_short":"Societal Challenges | Health"} +{"code":"H2020-EU.3.5.3.1.","title":"Improve the knowledge base on the availability of raw materials","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Improve the knowledge base on the availability of raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Improve the knowledge base on the availability of raw materials"} +{"code":"H2020-EU.3.2.1.4.","title":"Sustainable forestry","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Sustainable forestry","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Sustainable forestry"} +{"code":"H2020-EU.3.3.","title":"SOCIETAL CHALLENGES - Secure, clean and efficient energy","shortTitle":"Energy","language":"en","classification":"Societal challenges | Secure, clean and efficient energy","classification_short":"Societal Challenges | Energy"} +{"code":"H2020-EU.3.4.8.1.","title":"Innovation Programme 1 (IP1): Cost-efficient and reliable trains","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 1 (IP1): Cost-efficient and reliable trains","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 1 (IP1): Cost-efficient and reliable trains"} +{"code":"H2020-EU.2.3.2.1.","title":"Support for research intensive SMEs","shortTitle":"Support for research intensive SMEs","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support | Support for research intensive SMEs","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support | Support for research intensive SMEs"} +{"code":"H2020-EU.2.1.3.2.","title":"Materials development and transformation","shortTitle":"Materials development and transformation","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Materials development and transformation","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Materials development and transformation"} +{"code":"H2020-EU.1.4.1.3.","title":"Development, deployment and operation of ICT-based e-infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond | Development, deployment and operation of ICT-based e-infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond | Development, deployment and operation of ICT-based e-infrastructures"} +{"code":"H2020-EU.3.5.4.2.","title":"Support innovative policies and societal changes","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Support innovative policies and societal changes","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Support innovative policies and societal changes"} +{"code":"H2020-EU.2.1.3.6.","title":"Metrology, characterisation, standardisation and quality control","shortTitle":"Metrology, characterisation, standardisation and quality control","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Metrology, characterisation, standardisation and quality control","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Metrology, characterisation, standardisation and quality control"} +{"code":"H2020-EU.3.4.5.8.","title":"ECO Transverse Area","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ECO Transverse Area","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ECO Transverse Area"} +{"code":"H2020-EU.5.f.","title":"Develop the governance for the advancement of responsible research and innovation by all stakeholders, which is sensitive to society needs and demands and promote an ethics framework for research and innovation","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Develop the governance for the advancement of responsible research and innovation by all stakeholders, which is sensitive to society needs and demands and promote an ethics framework for research and innovation","classification_short":"Science with and for Society | Develop the governance for the advancement of responsible research and innovation by all stakeholders, which is sensitive to society needs and demands and promote an ethics framework for research and innovation"} +{"code":"H2020-EU.5.h.","title":"Improving knowledge on science communication in order to improve the quality and effectiveness of interactions between scientists, general media and the public","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Improving knowledge on science communication in order to improve the quality and effectiveness of interactions between scientists, general media and the public","classification_short":"Science with and for Society | Improving knowledge on science communication in order to improve the quality and effectiveness of interactions between scientists, general media and the public"} +{"code":"H2020-EU.2.1.1.7.1.","title":"Design technologies, process and integration, equipment, materials and manufacturing for micro- and nanoelectronics while targeting miniaturisation, diversification and differentiation, heterogeneous integration","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL | Design technologies, process and integration, equipment, materials and manufacturing for micro- and nanoelectronics while targeting miniaturisation, diversification and differentiation, heterogeneous integration","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL | Design technologies, process and integration, equipment, materials and manufacturing for micro- and nanoelectronics while targeting miniaturisation, diversification and differentiation, heterogeneous integration"} +{"code":"H2020-EU.3.7.5.","title":"Increase Europe's resilience to crises and disasters","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Increase Europe's resilience to crises and disasters","classification_short":"Societal Challenges | Secure societies | Increase Europe's resilience to crises and disasters"} +{"code":"H2020-EU.1.4.2.2.","title":"Strengthening the human capital of research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Fostering the innovation potential of research infrastructures and their human resources | Strengthening the human capital of research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures and their human resources | Strengthening the human capital of research infrastructures"} +{"code":"H2020-EU.3.4.1.2.","title":"Developing smart equipment, infrastructures and services","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment | Developing smart equipment, infrastructures and services","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment | Developing smart equipment, infrastructures and services"} +{"code":"H2020-EU.2.3.2.2.","title":"Enhancing the innovation capacity of SMEs","shortTitle":"Enhancing the innovation capacity of SMEs","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support | Enhancing the innovation capacity of SMEs","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support | Enhancing the innovation capacity of SMEs"} +{"code":"H2020-EU.1.3.5.","title":"Specific support and policy actions","shortTitle":"MSCA Specific support","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Specific support and policy actions","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Specific support"} +{"code":"H2020-EU.3.2.3.3.","title":"Boosting marine and maritime innovation through biotechnology","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources | Boosting marine and maritime innovation through biotechnology","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources | Boosting marine and maritime innovation through biotechnology"} +{"code":"H2020-EU.3.2.1.2.","title":"Providing ecosystems services and public goods","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Providing ecosystems services and public goods","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Providing ecosystems services and public goods"} +{"code":"H2020-EU.2.3.2.3.","title":"Supporting market-driven innovation","shortTitle":"Supporting market-driven innovation","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support | Supporting market-driven innovation","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support | Supporting market-driven innovation"} +{"code":"H2020-EU.5.a.","title":"Make scientific and technological careers attractive to young students, and forster sustainable interaction between schools, research institutions, industry and civil society organisations","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Make scientific and technological careers attractive to young students, and forster sustainable interaction between schools, research institutions, industry and civil society organisations","classification_short":"Science with and for Society | Make scientific and technological careers attractive to young students, and forster sustainable interaction between schools, research institutions, industry and civil society organisations"} +{"code":"H2020-EU.3.1.7.9.","title":"Ageing-associated diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Ageing-associated diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Ageing-associated diseases"} +{"code":"H2020-EU.2.2.1.","title":"The Debt facility providing debt finance for R&I: 'Union loan and guarantee service for research and innovation'","shortTitle":"Debt facility","language":"en","classification":"Industrial leadership | Access to risk finance | The Debt facility providing debt finance for R&I: 'Union loan and guarantee service for research and innovation'","classification_short":"Industrial Leadership | Access to risk finance | Debt facility"} +{"code":"H2020-Euratom-1.8.","title":"Ensure availability and use of research infrastructures of pan_european relevance","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Ensure availability and use of research infrastructures of pan_european relevance","classification_short":"Euratom | Indirect actions | Ensure availability and use of research infrastructures of pan_european relevance"} +{"code":"H2020-EU.3.2.2.1.","title":"Informed consumer choices","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Informed consumer choices","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Informed consumer choices"} +{"code":"H2020-EU.3.7.","title":"Secure societies - Protecting freedom and security of Europe and its citizens","shortTitle":"Secure societies","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens","classification_short":"Societal Challenges | Secure societies"} +{"code":"H2020-EU.1.3.4.","title":"Increasing structural impact by co-funding activities","shortTitle":"MSCA Co-funding","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Increasing structural impact by co-funding activities","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Co-funding"} +{"code":"H2020-EU.2.1.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies","shortTitle":"Leadership in enabling and industrial technologies (LEIT)","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT)"} +{"code":"H2020-EU.2.1.3.4.","title":"Materials for a sustainable, resource-efficient and low-emission industry","shortTitle":"Materials for a resource-efficient and low-emission industry","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Materials for a sustainable, resource-efficient and low-emission industry","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Materials for a resource-efficient and low-emission industry"} +{"code":"H2020-EU.3.4.5.7.","title":"Small Air Transport (SAT) Transverse Area","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | Small Air Transport (SAT) Transverse Area","classification_short":"Societal Challenges | Transport | CLEANSKY2 | Small Air Transport (SAT) Transverse Area"} +{"code":"H2020-EU.3.4.8.3.","title":"Innovation Programme 3: Cost Efficient and Reliable High Capacity Infrastructure","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 3: Cost Efficient and Reliable High Capacity Infrastructure","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 3: Cost Efficient and Reliable High Capacity Infrastructure"} +{"code":"H2020-Euratom-1.1.","title":"Support safe operation of nuclear systems","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Support safe operation of nuclear systems","classification_short":"Euratom | Indirect actions | Support safe operation of nuclear systems"} +{"code":"H2020-EU.2.3.1.","title":" Mainstreaming SME support, especially through a dedicated instrument","shortTitle":"Mainstreaming SME support","language":"en","classification":"Industrial leadership | Innovation In SMEs | Mainstreaming SME support, especially through a dedicated instrument","classification_short":"Industrial Leadership | Innovation in SMEs | Mainstreaming SME support"} +{"code":"H2020-EU.1.4.3.1.","title":"Reinforcing European policy for research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Reinforcing European research infrastructure policy and international cooperation | Reinforcing European policy for research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructure policy and international cooperation | Reinforcing European policy for research infrastructures"} +{"code":"H2020-Euratom-1.3.","title":"Support the development and sustainability of nuclear competences at Union level","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Support the development and sustainability of nuclear competences at Union level","classification_short":"Euratom | Indirect actions | Support the development and sustainability of nuclear competences at Union level"} +{"code":"H2020-EU.3.1.7.1.","title":"Antimicrobial resistance","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Antimicrobial resistance","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Antimicrobial resistance"} +{"code":"H2020-EU.3.7.4.","title":"Improve cyber security","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Improve cyber security","classification_short":"Societal Challenges | Secure societies | Improve cyber security"} +{"code":"H2020-EU.2.1.1.7.2.","title":"Processes, methods, tools and platforms, reference designs and architectures, for software and/or control-intensive embedded/cyber-physical systems, addressing seamless connectivity and interoperability, functional safety, high availability, and security for professional and consumer type applications, and connected services","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL | Processes, methods, tools and platforms, reference designs and architectures, for software and/or control-intensive embedded/cyber-physical systems, addressing seamless connectivity and interoperability, functional safety, high availability, and security for professional and consumer type applications, and connected services","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL | Processes, methods, tools and platforms, reference designs and architectures, for software and/or control-intensive embedded/cyber-physical systems, addressing seamless connectivity and interoperability, functional safety, high availability, and security for professional and consumer type applications, and connected services"} +{"code":"H2020-EU.3.5.4.","title":"Enabling the transition towards a green economy and society through eco-innovation","shortTitle":"A green economy and society through eco-innovation","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation"} +{"code":"H2020-EU.3.5.3.2.","title":"Promote the sustainable supply and use of raw materials, including mineral resources, from land and sea, covering exploration, extraction, processing, re-use, recycling and recovery","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Promote the sustainable supply and use of raw materials, including mineral resources, from land and sea, covering exploration, extraction, processing, re-use, recycling and recovery","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Promote the sustainable supply and use of raw materials, including mineral resources, from land and sea, covering exploration, extraction, processing, re-use, recycling and recovery"} +{"code":"H2020-EU.3.4.5.10.","title":"Thematic Topics","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | Thematic Topics","classification_short":"Societal Challenges | Transport | CLEANSKY2 | Thematic Topics"} +{"code":"H2020-EU.3.1.5.1.","title":"Improving halth information and better use of health data","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data | Improving halth information and better use of health data","classification_short":"Societal Challenges | Health | Methods and data | Improving halth information and better use of health data"} +{"code":"H2020-EU.3.3.3.1.","title":"Make bio-energy more competitive and sustainable","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Alternative fuels and mobile energy sources | Make bio-energy more competitive and sustainable","classification_short":"Societal Challenges | Energy | Alternative fuels and mobile energy sources | Make bio-energy more competitive and sustainable"} +{"code":"H2020-EU.3.6.2.1.","title":"Strengthen the evidence base and support for the Innovation Union and ERA","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Strengthen the evidence base and support for the Innovation Union and ERA","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Strengthen the evidence base and support for the Innovation Union and ERA"} +{"code":"H2020-EU.3.1.7.12.","title":"Vaccine","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Vaccine","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Vaccine"} +{"code":"H2020-EU.3.5.4.3.","title":"Measure and assess progress towards a green economy","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Measure and assess progress towards a green economy","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Measure and assess progress towards a green economy"} +{"code":"H2020-EU.3.4.8.5.","title":"Innovation Programme 5: Technologies for sustainable and attractive European rail freight","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 5: Technologies for sustainable and attractive European rail freight","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 5: Technologies for sustainable and attractive European rail freight"} +{"code":"H2020-EU.3.5.4.4.","title":"Foster resource efficiency through digital systems","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Enabling the transition towards a green economy and society through eco-innovation | Foster resource efficiency through digital systems","classification_short":"Societal Challenges | Climate and environment | A green economy and society through eco-innovation | Foster resource efficiency through digital systems"} +{"code":"H2020-EU.3.3.8.3.","title":"Demonstrate on a large scale the feasibility of using hydrogen to support integration of renewable energy sources into the energy systems, including through its use as a competitive energy storage medium for electricity produced from renewable energy sources","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives) | Demonstrate on a large scale the feasibility of using hydrogen to support integration of renewable energy sources into the energy systems, including through its use as a competitive energy storage medium for electricity produced from renewable energy sources","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives) | Demonstrate on a large scale the feasibility of using hydrogen to support integration of renewable energy sources into the energy systems, including through its use as a competitive energy storage medium for electricity produced from renewable energy sources"} +{"code":"H2020-Euratom","title":"Euratom","shortTitle":"","language":"en","classification":"Euratom","classification_short":"Euratom"} +{"code":"H2020-EU.3.5.6.2.","title":"Providing for a better understanding on how communities perceive and respond to climate change and seismic and volcanic hazards","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Cultural heritage | Providing for a better understanding on how communities perceive and respond to climate change and seismic and volcanic hazards","classification_short":"Societal Challenges | Climate and environment | Cultural heritage | Providing for a better understanding on how communities perceive and respond to climate change and seismic and volcanic hazards"} +{"code":"H2020-EU.3.2.5.2.","title":"Develop the potential of marine resources through an integrated approach","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research | Develop the potential of marine resources through an integrated approach","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research | Develop the potential of marine resources through an integrated approach"} +{"code":"H2020-EU.2.1.1.5.","title":"Advanced interfaces and robots: Robotics and smart spaces","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Advanced interfaces and robots: Robotics and smart spaces","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Advanced interfaces and robots: Robotics and smart spaces"} +{"code":"H2020-EU.3.3.5.","title":"New knowledge and technologies","shortTitle":"New knowledge and technologies","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | New knowledge and technologies","classification_short":"Societal Challenges | Energy | New knowledge and technologies"} +{"code":"H2020-EU.1.2.2.","title":"FET Proactive","shortTitle":"FET Proactive","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET) | FET Proactive","classification_short":"Excellent Science | Future and Emerging Technologies (FET) | FET Proactive"} +{"code":"H2020-EU.3.6.1.3.","title":"Europe's role as a global actor, notably regarding human rights and global justice","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | Europe's role as a global actor, notably regarding human rights and global justice","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | Europe's role as a global actor, notably regarding human rights and global justice"} +{"code":"H2020-EU.2.1.4.1.","title":"Boosting cutting-edge biotechnologies as a future innovation driver","shortTitle":"Cutting-edge biotechnologies as future innovation driver","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology | Boosting cutting-edge biotechnologies as a future innovation driver","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology | Cutting-edge biotechnologies as future innovation driver"} +{"code":"H2020-EU.3.1.3.","title":"Treating and managing disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Treating and managing disease","classification_short":"Societal Challenges | Health | Treating and managing disease"} +{"code":"H2020-EU.3.3.4.","title":"A single, smart European electricity grid","shortTitle":"A single, smart European electricity grid","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | A single, smart European electricity grid","classification_short":"Societal Challenges | Energy | A single, smart European electricity grid"} +{"code":"H2020-EU.3.2.6.","title":"Bio-based Industries Joint Technology Initiative (BBI-JTI)","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI)","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI)"} +{"code":"H2020-EU.1.3.2.","title":"Nurturing excellence by means of cross-border and cross-sector mobility","shortTitle":"MSCA Mobility","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Nurturing excellence by means of cross-border and cross-sector mobility","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Mobility"} +{"code":"H2020-EU.2.1.3.7.","title":"Optimisation of the use of materials","shortTitle":"Optimisation of the use of materials","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Optimisation of the use of materials","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Optimisation of the use of materials"} +{"code":"H2020-EU.2.1.2.4.","title":"Efficient and sustainable synthesis and manufacturing of nanomaterials, components and systems","shortTitle":"Synthesis and manufacturing of nanomaterials, components and systems","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Efficient and sustainable synthesis and manufacturing of nanomaterials, components and systems","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Synthesis and manufacturing of nanomaterials, components and systems"} +{"code":"H2020-EU.1.4.1.","title":"Developing the European research infrastructures for 2020 and beyond","shortTitle":"Research infrastructures for 2020 and beyond","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond"} +{"code":"H2020-EU.3.1.1.1.","title":"Understanding the determinants of health, improving health promotion and disease prevention","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease | Understanding the determinants of health, improving health promotion and disease prevention","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease | Understanding the determinants of health, improving health promotion and disease prevention"} +{"code":"H2020-EU.5.c.","title":"Integrate society in science and innovation issues, policies and activities in order to integrate citizens' interests and values and to increase the quality, relevance, social acceptability and sustainability of research and innovation outcomes in various fields of activity from social innovation to areas such as biotechnology and nanotechnology","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Integrate society in science and innovation issues, policies and activities in order to integrate citizens' interests and values and to increase the quality, relevance, social acceptability and sustainability of research and innovation outcomes in various fields of activity from social innovation to areas such as biotechnology and nanotechnology","classification_short":"Science with and for Society | Integrate society in science and innovation issues, policies and activities in order to integrate citizens' interests and values and to increase the quality, relevance, social acceptability and sustainability of research and innovation outcomes in various fields of activity from social innovation to areas such as biotechnology and nanotechnology"} +{"code":"H2020-EU.5.","title":"SCIENCE WITH AND FOR SOCIETY","shortTitle":"Science with and for Society","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY","classification_short":"Science with and for Society"} +{"code":"H2020-EU.3.5.3.3.","title":"Find alternatives for critical raw materials","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Find alternatives for critical raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Find alternatives for critical raw materials"} +{"code":"H2020-EU.3.2.3.1.","title":"Developing sustainable and environmentally-friendly fisheries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources | Developing sustainable and environmentally-friendly fisheries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources | Developing sustainable and environmentally-friendly fisheries"} +{"code":"H2020-EU.2.1.2.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies – Nanotechnologies","shortTitle":"Nanotechnologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies"} +{"code":"H2020-EU.3.4.3.2.","title":"On board, smart control systems","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | On board, smart control systems","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | On board, smart control systems"} +{"code":"H2020-EU.3.2.4.1.","title":"Fostering the bio-economy for bio-based industries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy | Fostering the bio-economy for bio-based industries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy | Fostering the bio-economy for bio-based industries"} +{"code":"H2020-EU.3.1.6.2.","title":"Optimising the efficiency and effectiveness of healthcare provision and reducing inequalities by evidence based decision making and dissemination of best practice, and innovative technologies and approaches","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Health care provision and integrated care | Optimising the efficiency and effectiveness of healthcare provision and reducing inequalities by evidence based decision making and dissemination of best practice, and innovative technologies and approaches","classification_short":"Societal Challenges | Health | Health care provision and integrated care | Optimising the efficiency and effectiveness of healthcare provision and reducing inequalities by evidence based decision making and dissemination of best practice, and innovative technologies and approaches"} +{"code":"H2020-EU.2.1.5.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies - Advanced manufacturing and processing","shortTitle":"Advanced manufacturing and processing","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing"} +{"code":"H2020-EU.3.5.2.2.","title":"Developing integrated approaches to address water-related challenges and the transition to sustainable management and use of water resources and services","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems | Developing integrated approaches to address water-related challenges and the transition to sustainable management and use of water resources and services","classification_short":"Societal Challenges | Climate and environment | Protection of the environment | Developing integrated approaches to address water-related challenges and the transition to sustainable management and use of water resources and services"} +{"code":"H2020-EU.3.1.7.3.","title":"Cardiovascular diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Cardiovascular diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Cardiovascular diseases"} +{"code":"H2020-EU.3.3.8.2.","title":"Increase the energy efficiency of production of hydrogen mainly from water electrolysis and renewable sources while reducing operating and capital costs, so that the combined system of the hydrogen production and the conversion using the fuel cell system can compete with the alternatives for electricity production available on the market","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives) | Increase the energy efficiency of production of hydrogen mainly from water electrolysis and renewable sources while reducing operating and capital costs, so that the combined system of the hydrogen production and the conversion using the fuel cell system can compete with the alternatives for electricity production available on the market","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives) | Increase the energy efficiency of production of hydrogen mainly from water electrolysis and renewable sources while reducing operating and capital costs, so that the combined system of the hydrogen production and the conversion using the fuel cell system can compete with the alternatives for electricity production available on the market"} +{"code":"H2020-EU.2.1.6.3.","title":"Enabling exploitation of space data","shortTitle":"Enabling exploitation of space data","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling exploitation of space data","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Enabling exploitation of space data"} +{"code":"H2020-EU.2.1.2.5.","title":"Developing and standardisation of capacity-enhancing techniques, measuring methods and equipment","shortTitle":"Capacity-enhancing techniques, measuring methods and equipment","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Developing and standardisation of capacity-enhancing techniques, measuring methods and equipment","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Capacity-enhancing techniques, measuring methods and equipment"} +{"code":"H2020-EU.3.6.2.","title":"Innovative societies","shortTitle":"Innovative societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies"} +{"code":"H2020-EU.3.1.2.1.","title":"Developing effective prevention and screening programmes and improving the assessment of disease susceptibility","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease | Developing effective prevention and screening programmes and improving the assessment of disease susceptibility","classification_short":"Societal Challenges | Health | Preventing disease | Developing effective prevention and screening programmes and improving the assessment of disease susceptibility"} +{"code":"H2020-EU.3.6.1.4.","title":"The promotion of sustainable and inclusive environments through innovative spatial and urban planning and design","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | The promotion of sustainable and inclusive environments through innovative spatial and urban planning and design","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | The promotion of sustainable and inclusive environments through innovative spatial and urban planning and design"} +{"code":"H2020-EU.3.3.2.4.","title":"Develop geothermal, hydro, marine and other renewable energy options","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop geothermal, hydro, marine and other renewable energy options","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop geothermal, hydro, marine and other renewable energy options"} +{"code":"H2020-EU.5.b.","title":"Promote gender equality in particular by supporting structural change in the organisation of research institutions and in the content and design of research activities","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Promote gender equality in particular by supporting structural change in the organisation of research institutions and in the content and design of research activities","classification_short":"Science with and for Society | Promote gender equality in particular by supporting structural change in the organisation of research institutions and in the content and design of research activities"} +{"code":"H2020-EU.1.3.3.","title":"Stimulating innovation by means of cross-fertilisation of knowledge","shortTitle":"MSCA Knowledge","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Stimulating innovation by means of cross-fertilisation of knowledge","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MSCA Knowledge"} +{"code":"H2020-EU.3.1.4.2.","title":"Individual awareness and empowerment for self-management of health","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Active ageing and self-management of health | Individual awareness and empowerment for self-management of health","classification_short":"Societal Challenges | Health | Active ageing and self-management of health | Individual awareness and empowerment for self-management of health"} +{"code":"H2020-EU.3.1.7.8.","title":"Immune-mediated diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Immune-mediated diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Immune-mediated diseases"} +{"code":"H2020-EU.3.4.","title":"SOCIETAL CHALLENGES - Smart, Green And Integrated Transport","shortTitle":"Transport","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport","classification_short":"Societal Challenges | Transport"} +{"code":"H2020-EU.3.2.6.1.","title":"Sustainable and competitive bio-based industries and supporting the development of a European bio-economy","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable and competitive bio-based industries and supporting the development of a European bio-economy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable and competitive bio-based industries and supporting the development of a European bio-economy"} +{"code":"H2020-EU.2.1.2.1.","title":"Developing next generation nanomaterials, nanodevices and nanosystems ","shortTitle":"Next generation nanomaterials, nanodevices and nanosystems","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Developing next generation nanomaterials, nanodevices and nanosystems ","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Next generation nanomaterials, nanodevices and nanosystems"} +{"code":"H2020-Euratom-1.5.","title":"Move toward demonstration of feasibility of fusion as a power source by exploiting existing and future fusion facilities","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Move toward demonstration of feasibility of fusion as a power source by exploiting existing and future fusion facilities","classification_short":"Euratom | Indirect actions | Move toward demonstration of feasibility of fusion as a power source by exploiting existing and future fusion facilities"} +{"code":"H2020-EU.3.5.","title":"SOCIETAL CHALLENGES - Climate action, Environment, Resource Efficiency and Raw Materials","shortTitle":"Climate and environment","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials","classification_short":"Societal Challenges | Climate and environment"} +{"code":"H2020-EU.2.1.1.6.","title":"Micro- and nanoelectronics and photonics: Key enabling technologies related to micro- and nanoelectronics and to photonics, covering also quantum technologies","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Micro- and nanoelectronics and photonics: Key enabling technologies related to micro- and nanoelectronics and to photonics, covering also quantum technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Micro- and nanoelectronics and photonics: Key enabling technologies related to micro- and nanoelectronics and to photonics, covering also quantum technologies"} +{"code":"H2020-EU.3.4.2.4.","title":"Reducing accident rates, fatalities and casualties and improving security","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | Reducing accident rates, fatalities and casualties and improving security","classification_short":"Societal Challenges | Transport | Mobility, safety and security | Reducing accident rates, fatalities and casualties and improving security"} +{"code":"H2020-EU.3.6.2.2.","title":"Explore new forms of innovation, with special emphasis on social innovation and creativity and understanding how all forms of innovation are developed, succeed or fail","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Explore new forms of innovation, with special emphasis on social innovation and creativity and understanding how all forms of innovation are developed, succeed or fail","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Explore new forms of innovation, with special emphasis on social innovation and creativity and understanding how all forms of innovation are developed, succeed or fail"} +{"code":"H2020-EU.3.5.1.1.","title":"Improve the understanding of climate change and the provision of reliable climate projections","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change | Improve the understanding of climate change and the provision of reliable climate projections","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change | Improve the understanding of climate change and the provision of reliable climate projections"} +{"code":"H2020-EU.3.4.3.4.","title":"Exploring entirely new transport concepts","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | Exploring entirely new transport concepts","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | Exploring entirely new transport concepts"} +{"code":"H2020-EU.3.5.2.1.","title":"Further our understanding of biodiversity and the functioning of ecosystems, their interactions with social systems and their role in sustaining the economy and human well-being","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems | Further our understanding of biodiversity and the functioning of ecosystems, their interactions with social systems and their role in sustaining the economy and human well-being","classification_short":"Societal Challenges | Climate and environment | Protection of the environment | Further our understanding of biodiversity and the functioning of ecosystems, their interactions with social systems and their role in sustaining the economy and human well-being"} +{"code":"H2020-EU.3.2.2.3.","title":"A sustainable and competitive agri-food industry","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | A sustainable and competitive agri-food industry","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | A sustainable and competitive agri-food industry"} +{"code":"H2020-EU.1.4.1.1.","title":"Developing new world-class research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond | Developing new world-class research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond | Developing new world-class research infrastructures"} +{"code":"H2020-EU.3.1.2.3.","title":"Developing better preventive and therapeutic vaccines","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease | Developing better preventive and therapeutic vaccines","classification_short":"Societal Challenges | Health | Preventing disease | Developing better preventive and therapeutic vaccines"} +{"code":"H2020-EU.1.4.3.2.","title":"Facilitate strategic international cooperation","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Reinforcing European research infrastructure policy and international cooperation | Facilitate strategic international cooperation","classification_short":"Excellent Science | Research Infrastructures | Research infrastructure policy and international cooperation | Facilitate strategic international cooperation"} +{"code":"H2020-EU.3.5.2.","title":"Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems","shortTitle":"Protection of the environment","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems","classification_short":"Societal Challenges | Climate and environment | Protection of the environment"} +{"code":"H2020-Euratom-1.9.","title":"European Fusion Development Agreement","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | European Fusion Development Agreement","classification_short":"Euratom | Indirect actions | European Fusion Development Agreement"} +{"code":"H2020-EU.3.2.1.1.","title":"Increasing production efficiency and coping with climate change, while ensuring sustainability and resilience","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Increasing production efficiency and coping with climate change, while ensuring sustainability and resilience","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Increasing production efficiency and coping with climate change, while ensuring sustainability and resilience"} +{"code":"H2020-EU.3.2.2.2.","title":"Healthy and safe foods and diets for all","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Healthy and safe foods and diets for all","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet | Healthy and safe foods and diets for all"} +{"code":"H2020-EU.2.1.4.2.","title":"Bio-technology based industrial products and processes","shortTitle":"Bio-technology based industrial products and processes","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology | Bio-technology based industrial products and processes","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology | Bio-technology based industrial products and processes"} +{"code":"H2020-EU.3.4.5.1.","title":"IADP Large Passenger Aircraft","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | IADP Large Passenger Aircraft","classification_short":"Societal Challenges | Transport | CLEANSKY2 | IADP Large Passenger Aircraft"} +{"code":"H2020-EU.3.1.1.3.","title":"Improving surveillance and preparedness","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease | Improving surveillance and preparedness","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease | Improving surveillance and preparedness"} +{"code":"H2020-EU.2.1.6.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies – Space","shortTitle":"Space","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space"} +{"code":"H2020-EU.3.1.5.2.","title":"Improving scientific tools and methods to support policy making and regulatory needs","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data | Improving scientific tools and methods to support policy making and regulatory needs","classification_short":"Societal Challenges | Health | Methods and data | Improving scientific tools and methods to support policy making and regulatory needs"} +{"code":"H2020-EU.3.","title":"Societal challenges","shortTitle":"Societal Challenges","language":"en","classification":"Societal challenges","classification_short":"Societal Challenges"} +{"code":"H2020-EU.1.3.","title":"EXCELLENT SCIENCE - Marie Skłodowska-Curie Actions","shortTitle":"Marie-Sklodowska-Curie Actions","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions"} +{"code":"H2020-EU.4.f.","title":"Strengthening the administrative and operational capacity of transnational networks of National Contact Points","shortTitle":"","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Strengthening the administrative and operational capacity of transnational networks of National Contact Points","classification_short":"Spreading excellence and widening participation | Strengthening the administrative and operational capacity of transnational networks of National Contact Points"} +{"code":"H2020-EU.1.2.","title":"EXCELLENT SCIENCE - Future and Emerging Technologies (FET)","shortTitle":"Future and Emerging Technologies (FET)","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET)","classification_short":"Excellent Science | Future and Emerging Technologies (FET)"} +{"code":"H2020-EU.3.3.1.1.","title":"Bring to mass market technologies and services for a smart and efficient energy use","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use | Bring to mass market technologies and services for a smart and efficient energy use","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint | Bring to mass market technologies and services for a smart and efficient energy use"} +{"code":"H2020-EU.3.3.2.2.","title":"Develop efficient, reliable and cost-competitive solar energy systems","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop efficient, reliable and cost-competitive solar energy systems","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop efficient, reliable and cost-competitive solar energy systems"} +{"code":"H2020-EU.4.c.","title":"Establishing ‚ERA Chairs’","shortTitle":"ERA chairs","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Establishing ‚ERA Chairs’","classification_short":"Spreading excellence and widening participation | ERA chairs"} +{"code":"H2020-EU.3.4.5","title":"CLEANSKY2","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2","classification_short":"Societal Challenges | Transport | CLEANSKY2"} +{"code":"H2020-EU.3.4.5.2.","title":"IADP Regional Aircraft","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | IADP Regional Aircraft","classification_short":"Societal Challenges | Transport | CLEANSKY2 | IADP Regional Aircraft"} +{"code":"H2020-EU.3.5.1.","title":"Fighting and adapting to climate change","shortTitle":"Fighting and adapting to climate change","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change"} +{"code":"H2020-EU.3.3.1.","title":"Reducing energy consumption and carbon foorpint by smart and sustainable use","shortTitle":"Reducing energy consumption and carbon footprint","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint"} +{"code":"H2020-EU.3.4.1.","title":"Resource efficient transport that respects the environment","shortTitle":"Resource efficient transport that respects the environment","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment"} +{"code":"H2020-EU.3.2.6.2.","title":"Fostering the bio-economy for bio-based industrie","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Fostering the bio-economy for bio-based industrie","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Fostering the bio-economy for bio-based industrie"} +{"code":"H2020-EU.3.4.7.1","title":"Exploratory Research","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | SESAR JU | Exploratory Research","classification_short":"Societal Challenges | Transport | SESAR JU | Exploratory Research"} +{"code":"H2020-EU.1.2.1.","title":"FET Open","shortTitle":"FET Open","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET) | FET Open","classification_short":"Excellent Science | Future and Emerging Technologies (FET) | FET Open"} +{"code":"H2020-EU.3.4.3.1.","title":"Developing the next generation of transport means as the way to secure market share in the future","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry | Developing the next generation of transport means as the way to secure market share in the future","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry | Developing the next generation of transport means as the way to secure market share in the future"} +{"code":"H2020-EU.3.2.4.","title":"Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy","shortTitle":"Bio-based industries and supporting bio-economy","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy"} +{"code":"H2020-EC","title":"Horizon 2020 Framework Programme","shortTitle":"EC Treaty","language":"en","classification":"Horizon 2020 Framework Programme","classification_short":"EC Treaty"} +{"code":"H2020-EU.3.6.2.4.","title":"Promote coherent and effective cooperation with third countries","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Promote coherent and effective cooperation with third countries","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Promote coherent and effective cooperation with third countries"} +{"code":"H2020-EU.3.1.7.5.","title":"Neurodegenerative diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Neurodegenerative diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Neurodegenerative diseases"} +{"code":"H2020-EU.2.1.6.4.","title":"Enabling European research in support of international space partnerships","shortTitle":"Research in support of international space partnerships","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European research in support of international space partnerships","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Research in support of international space partnerships"} +{"code":"H2020-EU.2.1.5.1.","title":"Technologies for Factories of the Future","shortTitle":"Technologies for Factories of the Future","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | Technologies for Factories of the Future","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | Technologies for Factories of the Future"} +{"code":"H2020-EU.2.3.2.","title":"Specific support","shortTitle":"","language":"en","classification":"Industrial leadership | Innovation In SMEs | Specific support","classification_short":"Industrial Leadership | Innovation in SMEs | Specific support"} +{"code":"H2020-EU.1.4.2.","title":"Fostering the innovation potential of research infrastructures and their human resources","shortTitle":"Research infrastructures and their human resources","language":"en","classification":"Excellent science | Research Infrastructures | Fostering the innovation potential of research infrastructures and their human resources","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures and their human resources"} +{"code":"H2020-EU.3.3.1.2.","title":"Unlock the potential of efficient and renewable heating-cooling systems","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use | Unlock the potential of efficient and renewable heating-cooling systems","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint | Unlock the potential of efficient and renewable heating-cooling systems"} +{"code":"H2020-EU.3.2.3.2.","title":"Developing competitive and environmentally-friendly European aquaculture","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources | Developing competitive and environmentally-friendly European aquaculture","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources | Developing competitive and environmentally-friendly European aquaculture"} +{"code":"H2020-EU.3.2.1.3.","title":"Empowerment of rural areas, support to policies and rural innovation","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry | Empowerment of rural areas, support to policies and rural innovation","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry | Empowerment of rural areas, support to policies and rural innovation"} +{"code":"H2020-EU.3.2.5.3.","title":"Cross-cutting concepts and technologies enabling maritime growth","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research | Cross-cutting concepts and technologies enabling maritime growth","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research | Cross-cutting concepts and technologies enabling maritime growth"} +{"code":"H2020-EU.2.1.3.1.","title":"Cross-cutting and enabling materials technologies","shortTitle":"Cross-cutting and enabling materials technologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced materials | Cross-cutting and enabling materials technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced materials | Cross-cutting and enabling materials technologies"} +{"code":"H2020-EU.3.1.1.2.","title":"Understanding disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease | Understanding disease","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease | Understanding disease"} +{"code":"H2020-Euratom-1.6.","title":"Lay the foundations for future fusion power plants by developing materials, technologies and conceptual design","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Lay the foundations for future fusion power plants by developing materials, technologies and conceptual design","classification_short":"Euratom | Indirect actions | Lay the foundations for future fusion power plants by developing materials, technologies and conceptual design"} +{"code":"H2020-EU.3.5.7.1.","title":"Reduce the use of the EU defined \"Critical raw materials\", for instance through low platinum or platinum free resources and through recycling or reducing or avoiding the use of rare earth elements","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | FCH2 (raw materials objective) | Reduce the use of the EU defined \"Critical raw materials\", for instance through low platinum or platinum free resources and through recycling or reducing or avoiding the use of rare earth elements","classification_short":"Societal Challenges | Climate and environment | FCH2 (raw materials objective) | Reduce the use of the EU defined \"Critical raw materials\", for instance through low platinum or platinum free resources and through recycling or reducing or avoiding the use of rare earth elements"} +{"code":"H2020-EU.2.2.","title":"INDUSTRIAL LEADERSHIP - Access to risk finance","shortTitle":"Access to risk finance","language":"en","classification":"Industrial leadership | Access to risk finance","classification_short":"Industrial Leadership | Access to risk finance"} +{"code":"H2020-EU.3.4.6.","title":"FCH2 (transport objectives)","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | FCH2 (transport objectives)","classification_short":"Societal Challenges | Transport | FCH2 (transport objectives)"} +{"code":"H2020-EU.4.d.","title":"A Policy Support Facility","shortTitle":"Policy Support Facility (PSF)","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | A Policy Support Facility","classification_short":"Spreading excellence and widening participation | Policy Support Facility (PSF)"} +{"code":"H2020-EU.2.1.1.7.","title":"ECSEL","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | ECSEL","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | ECSEL"} +{"code":"H2020-EU.3.1.5.","title":"Methods and data","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data","classification_short":"Societal Challenges | Health | Methods and data"} +{"code":"H2020-EU.3.7.7.","title":"Enhance stadardisation and interoperability of systems, including for emergency purposes","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Enhance stadardisation and interoperability of systems, including for emergency purposes","classification_short":"Societal Challenges | Secure societies | Enhance stadardisation and interoperability of systems, including for emergency purposes"} +{"code":"H2020-Euratom-1.7.","title":"Promote innovation and industry competitiveness","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Promote innovation and industry competitiveness","classification_short":"Euratom | Indirect actions | Promote innovation and industry competitiveness"} +{"code":"H2020-EU.2.1.5.3.","title":"Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries","shortTitle":"Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | Sustainable, resource-efficient and low-carbon technologies in energy-intensive process industries"} +{"code":"H2020-EU.2.1.4.3.","title":"Innovative and competitive platform technologies","shortTitle":"Innovative and competitive platform technologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology | Innovative and competitive platform technologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology | Innovative and competitive platform technologies"} +{"code":"H2020-EU.1.2.3.","title":"FET Flagships","shortTitle":"FET Flagships","language":"en","classification":"Excellent science | Future and Emerging Technologies (FET) | FET Flagships","classification_short":"Excellent Science | Future and Emerging Technologies (FET) | FET Flagships"} +{"code":"H2020-EU.3.6.3.","title":"Reflective societies - cultural heritage and European identity","shortTitle":"Reflective societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies"} +{"code":"H2020-EU.3.6.3.3.","title":"Research on Europe's role in the world, on the mutual influence and ties between the world regions, and a view from outside on European cultures","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity | Research on Europe's role in the world, on the mutual influence and ties between the world regions, and a view from outside on European cultures","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies | Research on Europe's role in the world, on the mutual influence and ties between the world regions, and a view from outside on European cultures"} +{"code":"H2020-EU.3.2.4.2.","title":"Developing integrated biorefineries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive bio-based industries and supporting the development of a European bioeconomy | Developing integrated biorefineries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based industries and supporting bio-economy | Developing integrated biorefineries"} +{"code":"H2020-EU.2.1.6.1.1.","title":"Safeguard and further develop a competitive, sustainable and entrepreneurial space industry and research community and strengthen European non-dependence in space systems","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling European competitiveness, non-dependence and innovation of the European space sector | Safeguard and further develop a competitive, sustainable and entrepreneurial space industry and research community and strengthen European non-dependence in space systems","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Competitiveness, non-dependence and innovation | Safeguard and further develop a competitive, sustainable and entrepreneurial space industry and research community and strengthen European non-dependence in space systems"} +{"code":"H2020-EU.3.1.3.2.","title":"Transferring knowledge to clinical practice and scalable innovation actions","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Treating and managing disease | Transferring knowledge to clinical practice and scalable innovation actions","classification_short":"Societal Challenges | Health | Treating and managing disease | Transferring knowledge to clinical practice and scalable innovation actions"} +{"code":"H2020-EU.2.","title":"Industrial leadership","shortTitle":"Industrial Leadership","language":"en","classification":"Industrial leadership","classification_short":"Industrial Leadership"} +{"code":"H2020-EU.3.4.1.3.","title":"Improving transport and mobility in urban areas","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Resource efficient transport that respects the environment | Improving transport and mobility in urban areas","classification_short":"Societal Challenges | Transport | Resource efficient transport that respects the environment | Improving transport and mobility in urban areas"} +{"code":"H2020-EU.4.e.","title":"Supporting access to international networks for excellent researchers and innovators who lack sufficient involvement in European and international networks","shortTitle":"","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Supporting access to international networks for excellent researchers and innovators who lack sufficient involvement in European and international networks","classification_short":"Spreading excellence and widening participation | Supporting access to international networks for excellent researchers and innovators who lack sufficient involvement in European and international networks"} +{"code":"H2020-EU.3.2.1.","title":"Sustainable agriculture and forestry","shortTitle":"Sustainable agriculture and forestry","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable agriculture and forestry","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable agriculture and forestry"} +{"code":"H2020-EU.3.1.7.7.","title":"Respiratory diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Respiratory diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Respiratory diseases"} +{"code":"H2020-EU.3.4.8.6.","title":"Cross-cutting themes and activities (CCA)","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Cross-cutting themes and activities (CCA)","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Cross-cutting themes and activities (CCA)"} +{"code":"H2020-EU.3.4.8.4.","title":"Innovation Programme 4: IT Solutions for attractive railway services","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 4: IT Solutions for attractive railway services","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 4: IT Solutions for attractive railway services"} +{"code":"H2020-EU.3.2.2.","title":"Sustainable and competitive agri-food sector for a safe and healthy diet","shortTitle":"Sustainable and competitive agri-food sector for a safe and healthy diet","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Sustainable and competitive agri-food sector for a safe and healthy diet"} +{"code":"H2020-EU.3.4.3.","title":"Global leadership for the European transport industry","shortTitle":"Global leadership for the European transport industry","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Global leadership for the European transport industry","classification_short":"Societal Challenges | Transport | Global leadership for the European transport industry"} +{"code":"H2020-EU.1.4.2.1.","title":"Exploiting the innovation potential of research infrastructures","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Fostering the innovation potential of research infrastructures and their human resources | Exploiting the innovation potential of research infrastructures","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures and their human resources | Exploiting the innovation potential of research infrastructures"} +{"code":"H2020-EU.3.3.2.3.","title":"Develop competitive and environmentally safe technologies for CO2 capture, transport, storage and re-use","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply | Develop competitive and environmentally safe technologies for CO2 capture, transport, storage and re-use","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply | Develop competitive and environmentally safe technologies for CO2 capture, transport, storage and re-use"} +{"code":"H2020-EU.3.6.3.1.","title":"Study European heritage, memory, identity, integration and cultural interaction and translation, including its representations in cultural and scientific collections, archives and museums, to better inform and understand the present by richer interpretations of the past","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Reflective societies - cultural heritage and European identity | Study European heritage, memory, identity, integration and cultural interaction and translation, including its representations in cultural and scientific collections, archives and museums, to better inform and understand the present by richer interpretations of the past","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Reflective societies | Study European heritage, memory, identity, integration and cultural interaction and translation, including its representations in cultural and scientific collections, archives and museums, to better inform and understand the present by richer interpretations of the past"} +{"code":"H2020-EU.2.1.2.2.","title":"Ensuring the safe and sustainable development and application of nanotechnologies","shortTitle":"Safe and sustainable nanotechnologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Nanotechnologies | Ensuring the safe and sustainable development and application of nanotechnologies","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Nanotechnologies | Safe and sustainable nanotechnologies"} +{"code":"H2020-EU.3.1.6.","title":"Health care provision and integrated care","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Health care provision and integrated care","classification_short":"Societal Challenges | Health | Health care provision and integrated care"} +{"code":"H2020-EU.3.4.5.9.","title":"Technology Evaluator","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | Technology Evaluator","classification_short":"Societal Challenges | Transport | CLEANSKY2 | Technology Evaluator"} +{"code":"H2020-EU.3.6.","title":"SOCIETAL CHALLENGES - Europe In A Changing World - Inclusive, Innovative And Reflective Societies","shortTitle":"Inclusive, innovative and reflective societies","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies"} +{"code":"H2020-EU.3.4.8.","title":"Shift2Rail JU","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU","classification_short":"Societal Challenges | Transport | Shift2Rail JU"} +{"code":"H2020-EU.3.2.6.3.","title":"Sustainable biorefineries","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable biorefineries","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Bio-based Industries Joint Technology Initiative (BBI-JTI) | Sustainable biorefineries"} +{"code":"H2020-EU.4.a.","title":"Teaming of excellent research institutions and low performing RDI regions","shortTitle":"Teaming of research institutions and low performing regions","language":"en","classification":"SPREADING EXCELLENCE AND WIDENING PARTICIPATION | Teaming of excellent research institutions and low performing RDI regions","classification_short":"Spreading excellence and widening participation | Teaming of research institutions and low performing regions"} +{"code":"H2020-EU.3.1.7.4.","title":"Diabetes","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Diabetes","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Diabetes"} +{"code":"H2020-EU.3.7.2.","title":"Protect and improve the resilience of critical infrastructures, supply chains and tranport modes","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Protect and improve the resilience of critical infrastructures, supply chains and tranport modes","classification_short":"Societal Challenges | Secure societies | Protect and improve the resilience of critical infrastructures, supply chains and tranport modes"} +{"code":"H2020-EU.3.1.2.","title":"Preventing disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Preventing disease","classification_short":"Societal Challenges | Health | Preventing disease"} +{"code":"H2020-EU.3.5.3.4.","title":"Improve societal awareness and skills on raw materials","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials | Improve societal awareness and skills on raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials | Improve societal awareness and skills on raw materials"} +{"code":"H2020-EU.3.3.7.","title":"Market uptake of energy innovation - building on Intelligent Energy Europe","shortTitle":"Market uptake of energy innovation","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Market uptake of energy innovation - building on Intelligent Energy Europe","classification_short":"Societal Challenges | Energy | Market uptake of energy innovation"} +{"code":"H2020-EU.2.3.","title":"INDUSTRIAL LEADERSHIP - Innovation In SMEs","shortTitle":"Innovation in SMEs","language":"en","classification":"Industrial leadership | Innovation In SMEs","classification_short":"Industrial Leadership | Innovation in SMEs"} +{"code":"H2020-EU.2.1.1.3.","title":"Future Internet: Software, hardware, Infrastructures, technologies and services","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Future Internet: Software, hardware, Infrastructures, technologies and services","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Future Internet: Software, hardware, Infrastructures, technologies and services"} +{"code":"H2020-EU.3.1.5.3.","title":"Using in-silico medicine for improving disease management and prediction","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Methods and data | Using in-silico medicine for improving disease management and prediction","classification_short":"Societal Challenges | Health | Methods and data | Using in-silico medicine for improving disease management and prediction"} +{"code":"H2020-EU.3.6.1.1.","title":"The mechanisms to promote smart, sustainable and inclusive growth","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Inclusive societies | The mechanisms to promote smart, sustainable and inclusive growth","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Inclusive societies | The mechanisms to promote smart, sustainable and inclusive growth"} +{"code":"H2020-EU.1.3.1.","title":"Fostering new skills by means of excellent initial training of researchers","shortTitle":"MCSA Initial training","language":"en","classification":"Excellent science | Marie Skłodowska-Curie Actions | Fostering new skills by means of excellent initial training of researchers","classification_short":"Excellent Science | Marie-Sklodowska-Curie Actions | MCSA Initial training"} +{"code":"H2020-EU.3.6.2.3.","title":"Make use of the innovative, creative and productive potential of all generations","shortTitle":"","language":"en","classification":"Societal challenges | Europe In A Changing World - Inclusive, Innovative And Reflective Societies | Innovative societies | Make use of the innovative, creative and productive potential of all generations","classification_short":"Societal Challenges | Inclusive, innovative and reflective societies | Innovative societies | Make use of the innovative, creative and productive potential of all generations"} +{"code":"H2020-EU.3.5.1.3.","title":"Support mitigation policies, including studies that focus on impact from other sectoral policies","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Fighting and adapting to climate change | Support mitigation policies, including studies that focus on impact from other sectoral policies","classification_short":"Societal Challenges | Climate and environment | Fighting and adapting to climate change | Support mitigation policies, including studies that focus on impact from other sectoral policies"} +{"code":"H2020-EU.3.3.1.3.","title":"Foster European Smart cities and Communities","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Reducing energy consumption and carbon foorpint by smart and sustainable use | Foster European Smart cities and Communities","classification_short":"Societal Challenges | Energy | Reducing energy consumption and carbon footprint | Foster European Smart cities and Communities"} +{"code":"H2020-EU.3.1.1.","title":"Understanding health, wellbeing and disease","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Understanding health, wellbeing and disease","classification_short":"Societal Challenges | Health | Understanding health, wellbeing and disease"} +{"code":"H2020-Euratom-1.","title":"Indirect actions","shortTitle":"","language":"en","classification":"Euratom | Indirect actions","classification_short":"Euratom | Indirect actions"} +{"code":"H2020-EU.3.5.7.","title":"FCH2 (raw materials objective)","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | FCH2 (raw materials objective)","classification_short":"Societal Challenges | Climate and environment | FCH2 (raw materials objective)"} +{"code":"H2020-EU.3.7.3.","title":"Strengthen security through border management","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Strengthen security through border management","classification_short":"Societal Challenges | Secure societies | Strengthen security through border management"} +{"code":"H2020-EU.2.1.1.2.","title":"Next generation computing: Advanced and secure computing systems and technologies, including cloud computing","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Next generation computing: Advanced and secure computing systems and technologies, including cloud computing","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Next generation computing: Advanced and secure computing systems and technologies, including cloud computing"} +{"code":"H2020-EU.3.5.5.","title":"Developing comprehensive and sustained global environmental observation and information systems","shortTitle":"Environmental observation and information systems","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Developing comprehensive and sustained global environmental observation and information systems","classification_short":"Societal Challenges | Climate and environment | Environmental observation and information systems"} +{"code":"H2020-EU.3.1.7.10.","title":"Cancer","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Cancer","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Cancer"} +{"code":"H2020-EU.3.4.8.2.","title":"Innovation Programme 2: Advanced traffic management and control systems","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Shift2Rail JU | Innovation Programme 2: Advanced traffic management and control systems","classification_short":"Societal Challenges | Transport | Shift2Rail JU | Innovation Programme 2: Advanced traffic management and control systems"} +{"code":"H2020-EU.5.e.","title":"Develop the accessibility and the use of the results of publicly-funded research","shortTitle":"","language":"en","classification":"SCIENCE WITH AND FOR SOCIETY | Develop the accessibility and the use of the results of publicly-funded research","classification_short":"Science with and for Society | Develop the accessibility and the use of the results of publicly-funded research"} +{"code":"H2020-EU.3.4.4.","title":"Socio-economic and behavioural research and forward looking activities for policy making","shortTitle":"Socio-economic and behavioural research","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Socio-economic and behavioural research and forward looking activities for policy making","classification_short":"Societal Challenges | Transport | Socio-economic and behavioural research"} +{"code":"H2020-EU.3.3.2.","title":"Low-cost, low-carbon energy supply","shortTitle":"Low-cost, low-carbon energy supply","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Low-cost, low-carbon energy supply","classification_short":"Societal Challenges | Energy | Low-cost, low-carbon energy supply"} +{"code":"H2020-EU.3.4.2.2.","title":"Substantial improvements in the mobility of people and freight","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | Better mobility, less congestion, more safety and security | Substantial improvements in the mobility of people and freight","classification_short":"Societal Challenges | Transport | Mobility, safety and security | Substantial improvements in the mobility of people and freight"} +{"code":"H2020-EU.3.5.6.","title":"Cultural heritage","shortTitle":"Cultural heritage","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Cultural heritage","classification_short":"Societal Challenges | Climate and environment | Cultural heritage"} +{"code":"H2020-EU.3.5.3.","title":"Ensuring the sustainable supply of non-energy and non-agricultural raw materials","shortTitle":"Supply of non-energy and non-agricultural raw materials","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Ensuring the sustainable supply of non-energy and non-agricultural raw materials","classification_short":"Societal Challenges | Climate and environment | Supply of non-energy and non-agricultural raw materials"} +{"code":"H2020-EU.2.1.5.2.","title":"Technologies enabling energy-efficient systems and energy-efficient buildings with a low environmental impact","shortTitle":"Technologies enabling energy-efficient systems and buildings","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | Technologies enabling energy-efficient systems and energy-efficient buildings with a low environmental impact","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | Technologies enabling energy-efficient systems and buildings"} +{"code":"H2020-EU.1.4.1.2.","title":"Integrating and opening existing national and regional research infrastructures of European interest","shortTitle":"","language":"en","classification":"Excellent science | Research Infrastructures | Developing the European research infrastructures for 2020 and beyond | Integrating and opening existing national and regional research infrastructures of European interest","classification_short":"Excellent Science | Research Infrastructures | Research infrastructures for 2020 and beyond | Integrating and opening existing national and regional research infrastructures of European interest"} +{"code":"H2020-EU.3.7.8.","title":"Support the Union's external security policies including through conflict prevention and peace-building","shortTitle":"","language":"en","classification":"Societal challenges | Secure societies - Protecting freedom and security of Europe and its citizens | Support the Union's external security policies including through conflict prevention and peace-building","classification_short":"Societal Challenges | Secure societies | Support the Union's external security policies including through conflict prevention and peace-building"} +{"code":"H2020-EU.2.1.1.1.","title":"A new generation of components and systems: Engineering of advanced embedded and energy and resource efficient components and systems","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | A new generation of components and systems: Engineering of advanced embedded and energy and resource efficient components and systems","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | A new generation of components and systems: Engineering of advanced embedded and energy and resource efficient components and systems"} +{"code":"H2020-EU.1.1.","title":"EXCELLENT SCIENCE - European Research Council (ERC)","shortTitle":"European Research Council (ERC)","language":"en","classification":"Excellent science | European Research Council (ERC)","classification_short":"Excellent Science | European Research Council (ERC)"} +{"code":"H2020-EU.3.4.5.6.","title":"ITD Systems","shortTitle":"","language":"en","classification":"Societal challenges | Smart, Green And Integrated Transport | CLEANSKY2 | ITD Systems","classification_short":"Societal Challenges | Transport | CLEANSKY2 | ITD Systems"} +{"code":"H2020-EU.6.","title":"NON-NUCLEAR DIRECT ACTIONS OF THE JOINT RESEARCH CENTRE (JRC)","shortTitle":"Joint Research Centre (JRC) non-nuclear direct actions","language":"en","classification":"NON-NUCLEAR DIRECT ACTIONS OF THE JOINT RESEARCH CENTRE (JRC)","classification_short":"Joint Research Centre (JRC) non-nuclear direct actions"} +{"code":"H2020-EU.3.2.5.1.","title":"Climate change impact on marine ecosystems and maritime economy","shortTitle":"","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Cross-cutting marine and maritime research | Climate change impact on marine ecosystems and maritime economy","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Cross-cutting marine and maritime research | Climate change impact on marine ecosystems and maritime economy"} +{"code":"H2020-Euratom-1.2.","title":"Contribute to the development of solutions for the management of ultimate nuclear waste","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Contribute to the development of solutions for the management of ultimate nuclear waste","classification_short":"Euratom | Indirect actions | Contribute to the development of solutions for the management of ultimate nuclear waste"} +{"code":"H2020-EU.3.1.7.11.","title":"Rare/Orphan Diseases","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Rare/Orphan Diseases","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Rare/Orphan Diseases"} +{"code":"H2020-EU.3.1.4.1.","title":"Active ageing, independent and assisted living","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Active ageing and self-management of health | Active ageing, independent and assisted living","classification_short":"Societal Challenges | Health | Active ageing and self-management of health | Active ageing, independent and assisted living"} +{"code":"H2020-Euratom-1.4.","title":"Foster radiation protection","shortTitle":"","language":"en","classification":"Euratom | Indirect actions | Foster radiation protection","classification_short":"Euratom | Indirect actions | Foster radiation protection"} +{"code":"H2020-EU.2.2.2.","title":"The Equity facility providing equity finance for R&I: 'Union equity instruments for research and innovation'","shortTitle":"Equity facility","language":"en","classification":"Industrial leadership | Access to risk finance | The Equity facility providing equity finance for R&I: 'Union equity instruments for research and innovation'","classification_short":"Industrial Leadership | Access to risk finance | Equity facility"} +{"code":"H2020-EU.3.3.8.","title":"FCH2 (energy objectives)","shortTitle":"","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | FCH2 (energy objectives)","classification_short":"Societal Challenges | Energy | FCH2 (energy objectives)"} +{"code":"H2020-EU.3.2.3.","title":"Unlocking the potential of aquatic living resources","shortTitle":"Potential of aquatic living resources","language":"en","classification":"Societal challenges | Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy | Unlocking the potential of aquatic living resources","classification_short":"Societal Challenges | Food, agriculture, forestry, marine research and bioeconomy | Potential of aquatic living resources"} +{"code":"H2020-EU.3.5.2.3.","title":"Provide knowledge and tools for effective decision making and public engagement","shortTitle":"","language":"en","classification":"Societal challenges | Climate action, Environment, Resource Efficiency and Raw Materials | Protection of the environment, sustainable management of natural resources, water, biodiversity and ecosystems | Provide knowledge and tools for effective decision making and public engagement","classification_short":"Societal Challenges | Climate and environment | Protection of the environment | Provide knowledge and tools for effective decision making and public engagement"} +{"code":"H2020-EU.3.3.6.","title":"Robust decision making and public engagement","shortTitle":"Robust decision making and public engagement","language":"en","classification":"Societal challenges | Secure, clean and efficient energy | Robust decision making and public engagement","classification_short":"Societal Challenges | Energy | Robust decision making and public engagement"} +{"code":"H2020-EU.3.1.7.2.","title":"Osteoarthritis","shortTitle":"","language":"en","classification":"Societal challenges | Health, demographic change and well-being | Innovative Medicines Initiative 2 (IMI2) | Osteoarthritis","classification_short":"Societal Challenges | Health | Innovative Medicines Initiative 2 (IMI2) | Osteoarthritis"} +{"code":"H2020-EU.2.1.1.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies - Information and Communication Technologies (ICT)","shortTitle":"Information and Communication Technologies","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT)","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies"} +{"code":"H2020-EU.2.1.6.2.","title":"Enabling advances in space technology","shortTitle":"Enabling advances in space technology","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Space | Enabling advances in space technology","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Space | Enabling advances in space technology"} +{"code":"H2020-EU.2.1.1.4.","title":"Content technologies and information management: ICT for digital content, cultural and creative industries","shortTitle":"","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Information and Communication Technologies (ICT) | Content technologies and information management: ICT for digital content, cultural and creative industries","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Information and Communication Technologies | Content technologies and information management: ICT for digital content, cultural and creative industries"} +{"code":"H2020-EU.2.1.5.4.","title":"New sustainable business models","shortTitle":"New sustainable business models","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Advanced manufacturing and processing | New sustainable business models","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Advanced manufacturing and processing | New sustainable business models"} +{"code":"H2020-EU.2.1.4.","title":"INDUSTRIAL LEADERSHIP - Leadership in enabling and industrial technologies – Biotechnology","shortTitle":"Biotechnology","language":"en","classification":"Industrial leadership | Leadership in enabling and industrial technologies | Biotechnology","classification_short":"Industrial Leadership | Leadership in enabling and industrial technologies (LEIT) | Biotechnology"} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json index 058ce8877..ca2c0f165 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/prepared_projects.json @@ -1,17 +1,17 @@ -{"rcn":"229267","id":"894593","acronym":"ICARUS","status":"SIGNED","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019","frameworkProgramme":"H2020","title":"INTEGRATED COMMON ALTITUDE REFERENCE SYSTEM FOR U-SPACE","startDate":"2020-05-01","endDate":"2022-07-31","projectUrl":"","objective":"ICARUS project proposes an innovative solution to the challenge of the Common Altitude Reference inside VLL airspaces with the definition of a new U-space service and its validation in a real operational environment. In manned aviation, the methods of determining the altitude of an aircraft are based on pressure altitude difference measurements (e.g. QFE, QNH and FL) referred to a common datum. \nThe UA flights superimpose a new challenge, since a small drone may take off and land almost from everywhere, hence reducing the original significance of QFE settings, introduced on behalf of manned pilots to display on the altimeter the 0-height at touchdown on the local runway. In fact, the possibility for n drones to take off at n different places would generate a series of n different QFE corresponding to different heights of ground pressures referred to the take-off “Home points”. Therefore for a large number drones, new methodologies and procedures shall be put in place. The ICARUS defines a new U-space U3 service tightly coupled with the interface of the existing U-space services (e.g. Tracking, and Flight Planning services). The users of ICARUS service shall be remote pilots competent to fly in BVLOS in the specific category of UAS operations and ultralight GA pilots potentially sharing the same VLL airspace. \nThe ICARUS proposed approach foresees the realization of DTM service embedded in an Application Program Interface (API) that can be queried by UAS pilot/operator (or by drone itself) based on the actual positioning of the UA along its trajectory, computed by the (E)GNSS receiver. The output of the DTM service would provide information on distance from ground/obstacles in combination with the common altitude reference.\nAccuracy, continuity, integrity and availability requirements for GNSS-based altimetry together with accuracy and resolution requirements of the DTM to be provided by ICARUS service are key topics of the study.","totalCost":"1385286,25","ecMaxContribution":"1144587,5","call":"H2020-SESAR-2019-2","fundingScheme":"SESAR-RIA","coordinator":"E-GEOS SPA","coordinatorCountry":"IT","participants":"TOPVIEW SRL;TELESPAZIO SPA;DRONERADAR SP Z O.O.;EUROCONTROL - EUROPEAN ORGANISATION FOR THE SAFETY OF AIR NAVIGATION;EUROUSC ESPANA SL;POLITECNICO DI MILANO;UNIVERSITA DEGLI STUDI DI ROMA LA SAPIENZA","participantCountries":"IT;PL;BE;ES","subjects":""} -{"rcn":"229284","id":"897004","acronym":"ISLand","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Isolation and Segregation Landscape. Archaeology of quarantine in the Indian Ocean World","startDate":"2020-11-01","endDate":"2023-10-31","projectUrl":"","objective":"The proposed research presents an experimental and completely novel investigation within the historical archaeology,\napplied to isolated contexts. The main objective of ISLand is to provide a new way of thinking about human interactions\nwithin colonial empires and bringing colonial studies into dialogue with medical history and the emerging concept of\nhealthscaping. It seeks to do so by studying quarantine facilities in the Indian Ocean World during the long nineteenth\ncentury, a crucial period for the history of European empires in that region and a flashpoint for the conceptualization of\nmodern public health. Quarantine, traditionally viewed as merely a mechanism for the control of disease, will be analyzed as\nthe outward material response to important changes taking place socially, ecologically, and politically at the time.\nThe project is a part of an international, interdisciplinary effort, combining history, archaeology, and anthropology. The\nresearcher will tap numerous archival sources and archaeological data from selected sites, examine them through social and\nspatial analysis, and systematically analyze a test case in Mauritius through the most innovative methods that target\nlandscape and standing archaeology.\nThe broader impacts of ISLand have relevance for current European approaches to the migration crisis, where the threat of\ndisease has been ignited as a potentially debilitating consequence of immigration from extra-European countries. The\ntraining-through-research project at the Stanford University, the top institution where acquiring knowledge and skills in\nhistorical archaeology, will allow the applicant to develop into a position of professional maturity with a specific\ninterdisciplinary set of skills. With the support of the host institutions in EU, the researcher will promote historical archaeology\nin European academy, stimulating new approaches in usual archaeological research and an interdisciplinary approach with\ncultural anthropology.","totalCost":"253052,16","ecMaxContribution":"253052,16","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-GF","coordinator":"UNIVERSITEIT VAN AMSTERDAM","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} -{"rcn":"229281","id":"896300","acronym":"STRETCH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Smart Textiles for RETrofitting and Monitoring of Cultural Heritage Buildings","startDate":"2020-09-01","endDate":"2022-08-31","projectUrl":"","objective":"This project aims to develop novel techniques using smart multifunctional materials for the combined seismic-plus-energy retrofitting, and Structural Health Monitoring (SHM) of the European cultural heritage buildings (CHB). The need for upgrading the existing old and CHB is becoming increasingly important for the EU countries, due to: (1) their poor structural performance during recent earthquakes (e.g. Italy, Greece) or other natural hazards (e.g. extreme weather conditions) that have resulted in significant economic losses, and loss of human lives; and (2) their low energy performance which increases significantly their energy consumption (buildings are responsible for 40% of EU energy consumption). Moreover, the SHM of the existing buildings is crucial for assessing continuously their structural integrity and thus to provide information for planning cost effective and sustainable maintenance decisions. Since replacing the old buildings with new is not financially feasible, and even it is not allowed for CHB, their lifetime extension requires considering simultaneously both structural and energy retrofitting. It is noted that the annual cost of repair and maintenance of existing European building stock is estimated to be about 50% of the total construction budget, currently standing at more than €300 billion. To achieve cost effectiveness, STRETCH explores a novel approach, which integrates technical textile reinforcement with thermal insulation systems and strain sensors to provide simultaneous structural-plus-energy retrofitting combined with SHM, tailored for masonry cultural heritage building envelopes. The effectiveness of the proposed retrofitting system will be validated experimentally and analytically. Moreover, draft guidelines and recommendations for determining future research on the use of smart composite materials for the concurrent retrofitting (structural-plus-energy) and SHM of the existing cultural heritage buildings envelopes will be proposed.","totalCost":"183473,28","ecMaxContribution":"183473,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"JRC -JOINT RESEARCH CENTRE- EUROPEAN COMMISSION","coordinatorCountry":"BE","participants":"","participantCountries":"","subjects":""} -{"rcn":"229265","id":"892890","acronym":"RhythmicPrediction","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Rhythmic prediction in speech perception: are our brain waves in sync with our native language?","startDate":"2021-01-01","endDate":"2022-12-31","projectUrl":"","objective":"Speech has rhythmic properties that widely differ across languages. When we listen to foreign languages, we may perceive them to be more musical, or rather more rap-like than our own. Even if we are unaware of it, the rhythm and melody of language, i.e. prosody, reflects its linguistic structure. On the one hand, prosody emphasizes content words and new information with stress and accents. On the other hand, it is aligned to phrase edges, marking them with boundary tones. Prosody hence helps the listener to focus on important words and to chunk sentences into phrases, and phrases into words. In fact, prosody is even used predictively, for instance to time the onset of the next word, the next piece of new information, or the total remaining length of the utterance, so the listener can seamlessly start their own speaking turn. \nSo, the listener, or rather their brain, is actively predicting when important speech events will happen, using prosody. How prosodic rhythms are exploited to predict speech timing, however, is unclear. No link between prosody and neural predictive processing has yet been empirically made. One hypothesis is that rhythm, such as the alternation of stressed and unstressed syllables, helps listeners time their attention. Similar behavior is best captured by the notion of an internal oscillator which can be set straight by attentional spikes. While neuroscientific evidence for the relation of neural oscillators to speech processing is starting to emerge, no link to the use of prosody nor predictive listening exists, yet. Furthermore, it is still unknown how native language knowledge affects cortical oscillations, and how oscillations are affected by cross-linguistic differences in rhythmic structure. The current project combines the standing knowledge of prosodic typology with the recent advances in neuroscience on cortical oscillations, to investigate the role of internal oscillators on native prosody perception, and active speech prediction.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE DE GENEVE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229235","id":"886828","acronym":"ASAP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Advanced Solutions for Asphalt Pavements","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"The Advanced Solutions for Asphalt Pavements (ASAP) project involves the development of a unique road paving technology which will use a bio-bitumen rejuvenator to rejuvenate aged asphalt bitumen. This technology will help to extend the lifespan of asphalt pavements (roads) and will reduce the environmental and economic impact of roads and road maintenance processes. Recycling and self-healing processes will replace fossil fuel dependent technology. Self-healing will involve rejuvenating aged asphalt bitumen using a bio-rejuvenator developed using microalgae oils (rejuvenating bio-oil). Microalgae has been selected because of its fast growth, versatility and ability to survive within hostile environments, such as wastewater. \n\nASAP will utilise microalgae, cultivated within the wastewater treatment process, as a source of the rejuvenating bio-oil. The solvent (Soxhlet) processes will be used to extract the oil from the microalgae. To ensure the efficiency of the oil extraction process, an ultrasonication process will be used to pre-treat the microalgae. The suitability of rejuvenating bio-oil as a replacement for the bitumen rejuvenator (fossil fuel based) will be ascertained via a series of standard bituminous and accelerated tests. A rejuvenator-binder diffusion numerical model will be developed, based on the Delft Lattice concrete diffusion model, to determine the conditions required for rejuvenation to occur and to ascertain the healing rate of the asphalt binder. These parameters will facilitate the selection and optimisation of the asphalt self-healing systems (specifically the amount of bio-oil rejuvenator and time required) to achieve full rejuvenation. \n\nThis novel approach will benchmark the effectiveness of this intervention against existing asphalt design and maintenance processes and assess feasibility. The ASAP project presents an opportunity to revolutionise road design and maintenance processes and reduce its environmental and financial costs.","totalCost":"187572,48","ecMaxContribution":"187572,48","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"NEDERLANDSE ORGANISATIE VOOR TOEGEPAST NATUURWETENSCHAPPELIJK ONDERZOEK TNO","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} -{"rcn":null,"id":"886776","acronym":null,"status":null,"programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D4","frameworkProgramme":"H2020","title":"BIO-Based pESTicides production for sustainable agriculture management plan","startDate":"2020-05-01","endDate":"2023-04-30","projectUrl":"","objective":"The BIOBESTicide project will validate and demonstrate the production of an effective and cost-efficient biopesticide. The demonstration will be based on an innovative bio-based value chain starting from the valorisation of sustainable biomasses, i.e. beet pulp and sugar molasses and will exploit the properties of the oomycete Pythium oligandrum strain I-5180 to increase natural plant defenses, to produce an highly effective and eco-friendly biopesticide solution for vine plants protection. \nBIOVITIS, the project coordinator, has developed, at laboratory level (TRL4), an effective method to biocontrol one of the major causes of worldwide vineyards destruction, the Grapevine Trunk Diseases (GTDs). The protection system is based on the oomycete Pythium oligandrum strain I-5180 that, at applied at optimal time and concentration, colonises the root of vines and stimulates the natural plant defences against GTDs, providing a protection that ranges between 40% and 60%. \nBIOBESTicide project will respond to the increasing demands for innovative solutions for crop protection agents, transferring the technology to a DEMO Plant able to produce more than 10 T of a high-quality oomycete-based biopesticide product per year (TRL7). \nThe BIOBESTicide project will validate the efficiency of the formulated product on vineyards of different geographical areas.\nTo assure the safety of products under both health and environmental points of view, a full and complete approval dossier for Pythium oligandrum strain I-5180 will be submitted in all the European countries. \nA Life Cycle Sustainability Assessment (LCSA) will be conducted to assess the environmental, economic and social impacts of the developed products.\nThe adoption of the effective and cost-efficient biopesticide will have significant impacts with a potential ROI of 30 % in just 5 years and a total EBITDA of more than € 6,400,000.","totalCost":"4402772,5","ecMaxContribution":"3069653","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"BIOVITIS","coordinatorCountry":"FR","participants":"MERCIER FRERES SARL;FUNDACION TECNALIA RESEARCH & INNOVATION;LAMBERTI SPA;EURION CONSULTING;CIAOTECH Srl;STOWARZYSZENIE ZACHODNIOPOMORSKI KLASTER CHEMICZNY ZIELONA CHEMIA;NORDZUCKER AG;INSTITUT NATIONAL DE RECHERCHE POUR L'AGRICULTURE, L'ALIMENTATION ET L'ENVIRONNEMENT;INSTITUT FRANCAIS DE LA VIGNE ET DU VIN","participantCountries":"FR;ES;IT;PL;DE","subjects":""} -{"rcn":null,"id":"886776","acronym":null,"status":null,"programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4","frameworkProgramme":"H2020","title":"BIO-Based pESTicides production for sustainable agriculture management plan","startDate":"2020-05-01","endDate":"2023-04-30","projectUrl":"","objective":"The BIOBESTicide project will validate and demonstrate the production of an effective and cost-efficient biopesticide. The demonstration will be based on an innovative bio-based value chain starting from the valorisation of sustainable biomasses, i.e. beet pulp and sugar molasses and will exploit the properties of the oomycete Pythium oligandrum strain I-5180 to increase natural plant defenses, to produce an highly effective and eco-friendly biopesticide solution for vine plants protection. \nBIOVITIS, the project coordinator, has developed, at laboratory level (TRL4), an effective method to biocontrol one of the major causes of worldwide vineyards destruction, the Grapevine Trunk Diseases (GTDs). The protection system is based on the oomycete Pythium oligandrum strain I-5180 that, at applied at optimal time and concentration, colonises the root of vines and stimulates the natural plant defences against GTDs, providing a protection that ranges between 40% and 60%. \nBIOBESTicide project will respond to the increasing demands for innovative solutions for crop protection agents, transferring the technology to a DEMO Plant able to produce more than 10 T of a high-quality oomycete-based biopesticide product per year (TRL7). \nThe BIOBESTicide project will validate the efficiency of the formulated product on vineyards of different geographical areas.\nTo assure the safety of products under both health and environmental points of view, a full and complete approval dossier for Pythium oligandrum strain I-5180 will be submitted in all the European countries. \nA Life Cycle Sustainability Assessment (LCSA) will be conducted to assess the environmental, economic and social impacts of the developed products.\nThe adoption of the effective and cost-efficient biopesticide will have significant impacts with a potential ROI of 30 % in just 5 years and a total EBITDA of more than € 6,400,000.","totalCost":"4402772,5","ecMaxContribution":"3069653","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"BIOVITIS","coordinatorCountry":"FR","participants":"MERCIER FRERES SARL;FUNDACION TECNALIA RESEARCH & INNOVATION;LAMBERTI SPA;EURION CONSULTING;CIAOTECH Srl;STOWARZYSZENIE ZACHODNIOPOMORSKI KLASTER CHEMICZNY ZIELONA CHEMIA;NORDZUCKER AG;INSTITUT NATIONAL DE RECHERCHE POUR L'AGRICULTURE, L'ALIMENTATION ET L'ENVIRONNEMENT;INSTITUT FRANCAIS DE LA VIGNE ET DU VIN","participantCountries":"FR;ES;IT;PL;DE","subjects":""} -{"rcn":"229276","id":"895426","acronym":"DisMoBoH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Dissecting the molecular building principles of locally formed transcriptional hubs","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Numerous DNA variants have already been identified that modulate inter-individual molecular traits – most prominently gene expression. However, since finding mechanistic interpretations relating genotype to phenotype has proven challenging, the focus has shifted to higher-order regulatory features, i.e. chromatin accessibility, transcription factor (TF) binding and 3D chromatin interactions. This revealed at least two enhancer types: “lead” enhancers in which the presence of genetic variants modulates the activity of entire chromatin domains, and “dependent” ones in which variants induce subtle changes, affecting DNA accessibility, but not transcription. Although cell type-specific TFs are likely important, it remains unclear which sequence features are required to establish such enhancer hierarchies, and under which circumstances genetic variation results in altered enhancer-promoter contacts and differential gene expression. Here, we propose to investigate the molecular mechanisms that link DNA variation to TF binding, chromatin topology, and gene expression response. We will leverage data on enhancer hierarchy and sequence-specific TF binding to identify the sequence signatures that define “lead” enhancers. The results will guide the design of a synthetic locus that serves as an in vivo platform to systematically vary the building blocks of local transcriptional units: i) DNA sequence – including variations in TF binding site affinity and syntax, ii) molecular interactions between TFs, and iii) chromatin conformation. To validate our findings, we will perform optical reconstruction of chromatin architecture for a select number of DNA variants. By simultaneously perturbing co-dependent features, this proposal will provide novel mechanistic insights into the formation of local transcriptional hubs.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-RI","coordinator":"ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229288","id":"898218","acronym":"devUTRs","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Uncovering the roles of 5′UTRs in translational control during early zebrafish development","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Following fertilisation, metazoan embryos are transcriptionally silent, and embryogenesis is controlled by maternally deposited factors. Developmental progression requires the synthesis of new mRNAs and proteins in a coordinated fashion. Many posttranscriptional mechanisms regulate the fate of maternal mRNAs, but it is less understood how translational control shapes early embryogenesis. In eukaryotes, translation starts at the mRNA 5′ end, consisting of the 5′ cap and 5′ untranslated region (UTR). Protein synthesis is primarily regulated at the translation initiation step by elements within the 5′UTR. However, the role of 5′UTRs in regulating the dynamics of mRNA translation during vertebrate embryogenesis remains unexplored. For example, all vertebrate ribosomal protein (RP) mRNAs harbor a conserved terminal oligopyrimidine tract (TOP) in their 5′UTR. RP levels must be tightly controlled to ensure proper organismal development, but if and how the TOP motif mediates RP mRNA translational regulation during embryogenesis is unclear. Overall, we lack a systematic understanding of the regulatory information contained in 5′UTRs. In this work, I aim to uncover the 5′UTR in vivo rules for mRNA translational regulation during zebrafish embryogenesis. I propose to apply imaging and biochemical approaches to characterise the role of the TOP motif in RP mRNA translational regulation during embryogenesis and identify the trans-acting factor(s) that bind(s) to it (Aim 1). To systematically assess the contribution of 5′UTRs to mRNA translational regulation during zebrafish embryogenesis, I will couple a massively parallel reporter assay of 5′UTRs to polysome profiling (Aim 2). By integrating the translational behaviour of 5′UTR reporters throughout embryogenesis with sequence-based regression models, I anticipate to uncover novel cis-regulatory elements in 5′UTRs with developmental roles.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITAT BASEL","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229261","id":"893787","acronym":"HOLYHOST","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Welfare and Hosting buildings in the “Holy Land” between the 4th and the 7th c. AD","startDate":"2020-10-01","endDate":"2022-09-30","projectUrl":"","objective":"Between the 4th and the 7th century AD, many hospices dedicated to the poor, elderly, strangers and travelers were built in the countryside, along roads, around and inside cities. They were commissioned by the Church, rich pious men and women concerned by the redeem of their sins, as well as emperors who saw this as a guarantee of social stability. Welfare is thus an important phenomena of Late Antiquity, abundantly mentioned by ancient literary sources and inscriptions, particularly in the eastern part of the Empire. However, the buildings that provided shelter and care to the needy have not yet received sufficient attention from archaeologists. Except for buildings which were identified by their inventors as hostels dedicated to pilgrims, they are still invisible in the field. \nThe aim of the HOLYHOST research project is to bring this social history’s main topic on the field of archaeology. It will address the welfare issue through the archaeological and architectural survey and study of Ancient welfare and hosting establishments’ remains, in the Holy Land (Palestine and Jordan) and around. This work will contribute to a better understanding of the practices linked to hospitality, welfare, accommodation and care in Antiquity. Moreover, such establishments served as models for medieval and modern Islamic, Jewish and Christian waqf institutions (religious endowment), and welfare continues to be highly relevant nowadays, through issues still at the heart of contemporary challenges debated in Europe: poverty, social exclusion, migrant crisis, principle of reception and hospitality. This interdisciplinary and diachronic research project will thus offer many new research perspectives, in terms of history of architecture, evolution of care practices, social and political regulations.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE PARIS I PANTHEON-SORBONNE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229282","id":"896189","acronym":"MICADO","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Microbial contribution to continental wetland carbon budget","startDate":"2021-01-04","endDate":"2023-01-03","projectUrl":"","objective":"Continental wetlands are major carbon dioxide sinks but the second largest source of methane. Monitoring of wetland methane emissions revealed large inter-site variability that is hard to explain in the framework of current biogeochemical theories. Methane production in wetlands is an anaerobic microbial driven process involving a complex set of microbial metabolisms depending on the availability of (i) energy (via the presence of specific redox couples), (ii) organic substrates and (iii) specific microbial communities. To understand the complexity of microbial drivers on wetland methane emissions and quantify their contribution, the MICADO project will set up a multidisciplinary approach linking isotope organic geochemistry and environmental microbiology to assess microbial functioning in situ. As an organic geochemist I have developed an innovative approach to trace in situ microbial activity via compound specific carbon isotope analysis of microbe macromolecules and organic metabolites. The host institution is a leader in France in environmental microbiology and biogeochemistry developing high-throughput metagenomics and microbial rate assessments, for which I will be trained during the MICADO project. These techniques are highly complementary and combined they will provide a comprehensive knowledge on microbial metabolisms involved in organic matter degradation encompassing their complexity and interactions. This will revisit the relationships between organic substrate availability and microbial communities and will contribute at estimating the impact of microbial activity on wetland methane emissions. This project will give me the opportunity to acquire fundamental knowledge and to develop original lines of research that will consolidate my position as an independent scientist in biogeochemistry.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"CENTRE NATIONAL DE LA RECHERCHE SCIENTIFIQUE CNRS","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229249","id":"891624","acronym":"CuTAN","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Copper-Catalyzed Multicomponent Reactions in Tandem Processes for Target Molecule Synthesis","startDate":"2021-02-01","endDate":"2023-01-31","projectUrl":"","objective":"The invention of processes that can form several bonds, stereocentres and rings in a single process is key to a sustainable future in synthetic chemistry. Multicomponent reactions and tandem procedures are two strategies that enable the rapid build-up of molecular complexity from simple reagents. By combining these two strategies into a single procedure, the diversity, complexity and value of products can be further enhanced along with the efficiency and economy of their construction. In this project, Dr Satpathi will develop novel copper-catalyzed multicomponent couplings of unsaturated hydrocarbons (e.g. allenes, enynes) with imines and boron reagents. These procedures will provide high-value amine products with universally high regio-, diastero- and enantiocontrol. The products will bear a variety of synthetic handles, for example, amino, alkynyl/alkenyl, and boryl groups, thus the products are primed for subsequent transformation. Dr Satpathi will exploit this functionality in tandem intramolecular couplings (e.g. intramolecular Suzuki/Buchwald-Hartwig reactions) to provide core cyclic structures of drug molecules and natural products. Thus, through a tandem procedure of; 1) copper-catalyzed borofunctionalization, and; 2) subsequent transition-metal catalyzed cyclization, he will gain efficient access to highly sought-after complex molecules. Overall, the process will provide high-value, chiral, cyclic motifs from abundant, achiral, linear substrates. Finally, Dr Satpathi has identified the phthalide-isoquinoline family of alkaloids as target molecules to display the power of his tandem methodology. Dr Satpathi has devised a novel route, which begins with our tandem multifunctionalization/cyclization reaction, to provide a range of these important alkaloids. The chosen alkaloids are of particular interest as they display a range of bioactivities – for example as natural products, receptor antagonists and on-market drugs.","totalCost":"212933,76","ecMaxContribution":"212933,76","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"THE UNIVERSITY OF MANCHESTER","coordinatorCountry":"UK","participants":"","participantCountries":"","subjects":""} -{"rcn":"229239","id":"887259","acronym":"ALEHOOP","status":"SIGNED","programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D3","frameworkProgramme":"H2020","title":"Biorefineries for the valorisation of macroalgal residual biomass and legume processing by-products to obtain new protein value chains for high-value food and feed applications","startDate":"2020-06-01","endDate":"2024-05-31","projectUrl":"","objective":"ALEHOOP provides the demonstration at pilot scale of both sustainable macroalgae and legume-based biorefineries for the recovery of low-cost dietary proteins from alga-based and plant residual biomass and their validation to meet market requirements of consumers and industry in the food and feed sectors. In these sectors, consumers are demanding affordable functional natural proteins from alternative sources and industry is demanding low-cost bio-based protein formulations with better performance and higher sustainability. \nCurrent protein demand for the 7.3 billion inhabitants of the world is approximately 202 Mt. Due to the rise in meat consumption more proteins are therefore required for animal feeding. To satisfy the current protein demand, Europe imports over 30 Mt of soy from the Americas each year mainly for animal feeding, entailing 95% dependency of EU on imported soy. Current sources of proteins are becoming unsustainable from an economic and environmental perspective for Europe resulting in concerns for sustainability and food security and leading to search for new alternative proteins. \nALEHOOP addresses the obtaining of proteins from green macroalgal blooms, brown seaweed by-products from algae processors and legume processing by-products (peas, lupines, beans and lentils) as alternative protein sources for animal feeding (case of green seaweed) and food applications (case of brown seaweed and legume by-products), since they are low cost and under-exploited biomass that do not compete with traditional food crops for space and resources. This will reduce EU´s dependency on protein imports and contribute to our raw material security. The new proteins will be validated in foods for elderly, sporty and overweight people, vegetarians and healthy consumers as well as for animal feed creating cross-sectorial interconnection between these value chains and supporting the projected business plan.","totalCost":"6718370","ecMaxContribution":"5140274,41","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"CONTACTICA S.L.","coordinatorCountry":"ES","participants":"CENTIV GMBH;ALGINOR ASA;FUNDACION TECNALIA RESEARCH & INNOVATION;INDUKERN,S.A.;ASOCIACION NACIONAL DE FABRICANTES DE CONSERVAS DE PESCADOS Y MARISCOS-CENTRO TECNICO NACIONAL DE CONSERVACION DE PRODUCTOS DE LA PESCA;BIOZOON GMBH;EIGEN VERMOGEN VAN HET INSTITUUT VOOR LANDBOUW- EN VISSERIJONDERZOEK;BIOSURYA SL;VYZKUMNY USTAV VETERINARNIHO LEKARSTVI;NUTRITION SCIENCES;TECHNOLOGICAL UNIVERSITY DUBLIN;GARLAN, S.COOP.;ISANATUR SPAIN SL;UNIVERSIDAD DE VIGO;UNIVERSIDAD DE CADIZ","participantCountries":"DE;NO;ES;BE;CZ;IE","subjects":""} -{"rcn":"229239","id":"887259","acronym":"ALEHOOP","status":"SIGNED","programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3","frameworkProgramme":"H2020","title":"Biorefineries for the valorisation of macroalgal residual biomass and legume processing by-products to obtain new protein value chains for high-value food and feed applications","startDate":"2020-06-01","endDate":"2024-05-31","projectUrl":"","objective":"ALEHOOP provides the demonstration at pilot scale of both sustainable macroalgae and legume-based biorefineries for the recovery of low-cost dietary proteins from alga-based and plant residual biomass and their validation to meet market requirements of consumers and industry in the food and feed sectors. In these sectors, consumers are demanding affordable functional natural proteins from alternative sources and industry is demanding low-cost bio-based protein formulations with better performance and higher sustainability. \nCurrent protein demand for the 7.3 billion inhabitants of the world is approximately 202 Mt. Due to the rise in meat consumption more proteins are therefore required for animal feeding. To satisfy the current protein demand, Europe imports over 30 Mt of soy from the Americas each year mainly for animal feeding, entailing 95% dependency of EU on imported soy. Current sources of proteins are becoming unsustainable from an economic and environmental perspective for Europe resulting in concerns for sustainability and food security and leading to search for new alternative proteins. \nALEHOOP addresses the obtaining of proteins from green macroalgal blooms, brown seaweed by-products from algae processors and legume processing by-products (peas, lupines, beans and lentils) as alternative protein sources for animal feeding (case of green seaweed) and food applications (case of brown seaweed and legume by-products), since they are low cost and under-exploited biomass that do not compete with traditional food crops for space and resources. This will reduce EU´s dependency on protein imports and contribute to our raw material security. The new proteins will be validated in foods for elderly, sporty and overweight people, vegetarians and healthy consumers as well as for animal feed creating cross-sectorial interconnection between these value chains and supporting the projected business plan.","totalCost":"6718370","ecMaxContribution":"5140274,41","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"CONTACTICA S.L.","coordinatorCountry":"ES","participants":"CENTIV GMBH;ALGINOR ASA;FUNDACION TECNALIA RESEARCH & INNOVATION;INDUKERN,S.A.;ASOCIACION NACIONAL DE FABRICANTES DE CONSERVAS DE PESCADOS Y MARISCOS-CENTRO TECNICO NACIONAL DE CONSERVACION DE PRODUCTOS DE LA PESCA;BIOZOON GMBH;EIGEN VERMOGEN VAN HET INSTITUUT VOOR LANDBOUW- EN VISSERIJONDERZOEK;BIOSURYA SL;VYZKUMNY USTAV VETERINARNIHO LEKARSTVI;NUTRITION SCIENCES;TECHNOLOGICAL UNIVERSITY DUBLIN;GARLAN, S.COOP.;ISANATUR SPAIN SL;UNIVERSIDAD DE VIGO;UNIVERSIDAD DE CADIZ","participantCountries":"DE;NO;ES;BE;CZ;IE","subjects":""} -{"rcn":"229258","id":"892834","acronym":"DENVPOC","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"qPCR Microfluidics point-of-care platform for dengue diagnosis","startDate":"2020-05-18","endDate":"2022-05-17","projectUrl":"","objective":"As a result of Global climate change and fast urbanization, global outbreaks of Dengue (DENV)/ Zika(ZIKV)/Chikungunya(CHIKV) virus have the potential to occur. The most common pathway of these infections in humans is through the female Aedes mosquito vector. DENV is an exanthematous febrile disease with varied clinical manifestations and progressions . Due to similarities in symptoms between DENV and ZIKV and CHIKV, it is difficult to make a differential diagnosis, impeding appropriate, timely medical intervention. Furthermore, cross-reactivity with ZIKV, which was recently related to microcephaly, is a serious issue. In 2016, in Brazil alone, there were 4180 microcephaly cases reported instead of 163 cases, more in line with yearly expected projections , , Thus, the sooner an accurate diagnostic which differentiates DENV from the other manifestations is critical; most especially at the early stages of the infection, to have a reliable diagnosis in pregnant women. In 2016, the OMS emergency committee declared that the outbreaks and the potentially resultant neurological disorders in Brazil were an important international state of emergency in public health, as a result of the associated secondary effects; these diseases became a Global concern. This project allows developing a highly and fast Multiplex qPCR POC platform by using FASTGENE technology with a minimal amount of patient serotype. It would reduce the time of analysis (30 to 90’ for a standard) and costs. Additionally, the sample preprocessing and thermalization will shorten real-time PCR amplification time and will be integrated within the microfluidic systems. This platform can result in a commercialized product whereupon a main market target would be pregnant women and people living or traveling through/from outbreak risk areas.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-SE","coordinator":"BFORCURE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229280","id":"895716","acronym":"DoMiCoP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"The Diffusion of Migration Control Practice. Actors, Processes and Effects.","startDate":"2021-03-01","endDate":"2023-02-28","projectUrl":"","objective":"DoMiCoP develops new understandings and perspectives to study migration control in practice in the European Union by asking one main question: how and why do communities of practice develop and diffuse the knowledge required to put migration control into action? Unlike the nexus between expert knowledge, epistemic communities and policy formulation, the nexus between everyday knowledge, communities of practice and policy implementation has not yet received systematic scholarly attention. My project bridges that gap by focusing on intermediate arenas in which communities of practice take shape most notably the meetings and trainings that gather state and non-state actors involved in putting asylum, detention and removal into practice. By building on field-based methodologies (interviews and participant observations), DoMiCoP sheds ethnographic light on the role that ‘learning from abroad’ plays in the implementation of migration control in the EU. My project’s aim is threefold: 1) Identifying arenas at intermediate levels in which communities of practice take shape; 2) Analysing the communities of practice by focusing on the configurations of actors and organizations involved, the motivations underlying their involvement, the process of knowledge development in interaction, the conflicts and negotiations; 3) Revealing the role of non-state organizations (private for profit and not-for-profit). From a theoretical point of view, this project goes beyond the classical view of the implementation as a test to assess the effectiveness of policy transfers towards an analysis of policy transfer at that level of policy-making. From an empirical point of view, the project expands knowledge about less-studied venues of policy-making and provides original thick descriptions. From a methodological point of view, the project engages with qualitative methods for the study of policy diffusion and aims at responding to their main challenges through participant observation.","totalCost":"163673,28","ecMaxContribution":"163673,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"EUROPEAN UNIVERSITY INSTITUTE","coordinatorCountry":"IT","participants":"","participantCountries":"","subjects":""} -{"rcn":"230066","id":"883730","acronym":"SOLSPACE","status":"SIGNED","programme":"H2020-EU.1.1.","topics":"ERC-2019-ADG","frameworkProgramme":"H2020","title":"Enhancing Global Clean Energy Services Using Orbiting Solar Reflectors", "startDate":"2021-03-01","endDate":"2025-11-30","projectUrl":"","objective":"fake", "totalCost":"2496392","ecMaxContribution":"2496392","call":"ERC-2019-ADG","fundingScheme":"ERC-ADG","coordinator":"UNIVERSITY OF GLASGOW","coordinatorCountry":"UK","participants":"","participantCountries":"","subjects":""} +{"id":"894593","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019"} +{"id":"897004","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"896300","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"892890","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"886828","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"886776","programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D4"} +{"id":"886776","programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4"} +{"id":"895426","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"898218","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"893787","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"896189","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"891624","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"887259","programme":"H2020-EU.2.1.4.","topics":"BBI-2019-SO3-D3"} +{"id":"887259","programme":"H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3"} +{"id":"892834","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"895716","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"883730","programme":"H2020-EU.1.1.","topics":"ERC-2019-ADG"} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json index edf83fbc8..ae1b7cb82 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/projects_subset.json @@ -1,16 +1,16 @@ -{"rcn":"229267","id":"894593","acronym":"ICARUS","status":"SIGNED","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019","frameworkProgramme":"H2020","title":"INTEGRATED COMMON ALTITUDE REFERENCE SYSTEM FOR U-SPACE","startDate":"2020-05-01","endDate":"2022-07-31","projectUrl":"","objective":"ICARUS project proposes an innovative solution to the challenge of the Common Altitude Reference inside VLL airspaces with the definition of a new U-space service and its validation in a real operational environment. In manned aviation, the methods of determining the altitude of an aircraft are based on pressure altitude difference measurements (e.g. QFE, QNH and FL) referred to a common datum. \nThe UA flights superimpose a new challenge, since a small drone may take off and land almost from everywhere, hence reducing the original significance of QFE settings, introduced on behalf of manned pilots to display on the altimeter the 0-height at touchdown on the local runway. In fact, the possibility for n drones to take off at n different places would generate a series of n different QFE corresponding to different heights of ground pressures referred to the take-off “Home points”. Therefore for a large number drones, new methodologies and procedures shall be put in place. The ICARUS defines a new U-space U3 service tightly coupled with the interface of the existing U-space services (e.g. Tracking, and Flight Planning services). The users of ICARUS service shall be remote pilots competent to fly in BVLOS in the specific category of UAS operations and ultralight GA pilots potentially sharing the same VLL airspace. \nThe ICARUS proposed approach foresees the realization of DTM service embedded in an Application Program Interface (API) that can be queried by UAS pilot/operator (or by drone itself) based on the actual positioning of the UA along its trajectory, computed by the (E)GNSS receiver. The output of the DTM service would provide information on distance from ground/obstacles in combination with the common altitude reference.\nAccuracy, continuity, integrity and availability requirements for GNSS-based altimetry together with accuracy and resolution requirements of the DTM to be provided by ICARUS service are key topics of the study.","totalCost":"1385286,25","ecMaxContribution":"1144587,5","call":"H2020-SESAR-2019-2","fundingScheme":"SESAR-RIA","coordinator":"E-GEOS SPA","coordinatorCountry":"IT","participants":"TOPVIEW SRL;TELESPAZIO SPA;DRONERADAR SP Z O.O.;EUROCONTROL - EUROPEAN ORGANISATION FOR THE SAFETY OF AIR NAVIGATION;EUROUSC ESPANA SL;POLITECNICO DI MILANO;UNIVERSITA DEGLI STUDI DI ROMA LA SAPIENZA","participantCountries":"IT;PL;BE;ES","subjects":""} -{"rcn":"229284","id":"897004","acronym":"ISLand","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Isolation and Segregation Landscape. Archaeology of quarantine in the Indian Ocean World","startDate":"2020-11-01","endDate":"2023-10-31","projectUrl":"","objective":"The proposed research presents an experimental and completely novel investigation within the historical archaeology,\napplied to isolated contexts. The main objective of ISLand is to provide a new way of thinking about human interactions\nwithin colonial empires and bringing colonial studies into dialogue with medical history and the emerging concept of\nhealthscaping. It seeks to do so by studying quarantine facilities in the Indian Ocean World during the long nineteenth\ncentury, a crucial period for the history of European empires in that region and a flashpoint for the conceptualization of\nmodern public health. Quarantine, traditionally viewed as merely a mechanism for the control of disease, will be analyzed as\nthe outward material response to important changes taking place socially, ecologically, and politically at the time.\nThe project is a part of an international, interdisciplinary effort, combining history, archaeology, and anthropology. The\nresearcher will tap numerous archival sources and archaeological data from selected sites, examine them through social and\nspatial analysis, and systematically analyze a test case in Mauritius through the most innovative methods that target\nlandscape and standing archaeology.\nThe broader impacts of ISLand have relevance for current European approaches to the migration crisis, where the threat of\ndisease has been ignited as a potentially debilitating consequence of immigration from extra-European countries. The\ntraining-through-research project at the Stanford University, the top institution where acquiring knowledge and skills in\nhistorical archaeology, will allow the applicant to develop into a position of professional maturity with a specific\ninterdisciplinary set of skills. With the support of the host institutions in EU, the researcher will promote historical archaeology\nin European academy, stimulating new approaches in usual archaeological research and an interdisciplinary approach with\ncultural anthropology.","totalCost":"253052,16","ecMaxContribution":"253052,16","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-GF","coordinator":"UNIVERSITEIT VAN AMSTERDAM","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} -{"rcn":"229281","id":"896300","acronym":"STRETCH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Smart Textiles for RETrofitting and Monitoring of Cultural Heritage Buildings","startDate":"2020-09-01","endDate":"2022-08-31","projectUrl":"","objective":"This project aims to develop novel techniques using smart multifunctional materials for the combined seismic-plus-energy retrofitting, and Structural Health Monitoring (SHM) of the European cultural heritage buildings (CHB). The need for upgrading the existing old and CHB is becoming increasingly important for the EU countries, due to: (1) their poor structural performance during recent earthquakes (e.g. Italy, Greece) or other natural hazards (e.g. extreme weather conditions) that have resulted in significant economic losses, and loss of human lives; and (2) their low energy performance which increases significantly their energy consumption (buildings are responsible for 40% of EU energy consumption). Moreover, the SHM of the existing buildings is crucial for assessing continuously their structural integrity and thus to provide information for planning cost effective and sustainable maintenance decisions. Since replacing the old buildings with new is not financially feasible, and even it is not allowed for CHB, their lifetime extension requires considering simultaneously both structural and energy retrofitting. It is noted that the annual cost of repair and maintenance of existing European building stock is estimated to be about 50% of the total construction budget, currently standing at more than €300 billion. To achieve cost effectiveness, STRETCH explores a novel approach, which integrates technical textile reinforcement with thermal insulation systems and strain sensors to provide simultaneous structural-plus-energy retrofitting combined with SHM, tailored for masonry cultural heritage building envelopes. The effectiveness of the proposed retrofitting system will be validated experimentally and analytically. Moreover, draft guidelines and recommendations for determining future research on the use of smart composite materials for the concurrent retrofitting (structural-plus-energy) and SHM of the existing cultural heritage buildings envelopes will be proposed.","totalCost":"183473,28","ecMaxContribution":"183473,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"JRC -JOINT RESEARCH CENTRE- EUROPEAN COMMISSION","coordinatorCountry":"BE","participants":"","participantCountries":"","subjects":""} -{"rcn":"229265","id":"892890","acronym":"RhythmicPrediction","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Rhythmic prediction in speech perception: are our brain waves in sync with our native language?","startDate":"2021-01-01","endDate":"2022-12-31","projectUrl":"","objective":"Speech has rhythmic properties that widely differ across languages. When we listen to foreign languages, we may perceive them to be more musical, or rather more rap-like than our own. Even if we are unaware of it, the rhythm and melody of language, i.e. prosody, reflects its linguistic structure. On the one hand, prosody emphasizes content words and new information with stress and accents. On the other hand, it is aligned to phrase edges, marking them with boundary tones. Prosody hence helps the listener to focus on important words and to chunk sentences into phrases, and phrases into words. In fact, prosody is even used predictively, for instance to time the onset of the next word, the next piece of new information, or the total remaining length of the utterance, so the listener can seamlessly start their own speaking turn. \nSo, the listener, or rather their brain, is actively predicting when important speech events will happen, using prosody. How prosodic rhythms are exploited to predict speech timing, however, is unclear. No link between prosody and neural predictive processing has yet been empirically made. One hypothesis is that rhythm, such as the alternation of stressed and unstressed syllables, helps listeners time their attention. Similar behavior is best captured by the notion of an internal oscillator which can be set straight by attentional spikes. While neuroscientific evidence for the relation of neural oscillators to speech processing is starting to emerge, no link to the use of prosody nor predictive listening exists, yet. Furthermore, it is still unknown how native language knowledge affects cortical oscillations, and how oscillations are affected by cross-linguistic differences in rhythmic structure. The current project combines the standing knowledge of prosodic typology with the recent advances in neuroscience on cortical oscillations, to investigate the role of internal oscillators on native prosody perception, and active speech prediction.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE DE GENEVE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229235","id":"886828","acronym":"ASAP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Advanced Solutions for Asphalt Pavements","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"The Advanced Solutions for Asphalt Pavements (ASAP) project involves the development of a unique road paving technology which will use a bio-bitumen rejuvenator to rejuvenate aged asphalt bitumen. This technology will help to extend the lifespan of asphalt pavements (roads) and will reduce the environmental and economic impact of roads and road maintenance processes. Recycling and self-healing processes will replace fossil fuel dependent technology. Self-healing will involve rejuvenating aged asphalt bitumen using a bio-rejuvenator developed using microalgae oils (rejuvenating bio-oil). Microalgae has been selected because of its fast growth, versatility and ability to survive within hostile environments, such as wastewater. \n\nASAP will utilise microalgae, cultivated within the wastewater treatment process, as a source of the rejuvenating bio-oil. The solvent (Soxhlet) processes will be used to extract the oil from the microalgae. To ensure the efficiency of the oil extraction process, an ultrasonication process will be used to pre-treat the microalgae. The suitability of rejuvenating bio-oil as a replacement for the bitumen rejuvenator (fossil fuel based) will be ascertained via a series of standard bituminous and accelerated tests. A rejuvenator-binder diffusion numerical model will be developed, based on the Delft Lattice concrete diffusion model, to determine the conditions required for rejuvenation to occur and to ascertain the healing rate of the asphalt binder. These parameters will facilitate the selection and optimisation of the asphalt self-healing systems (specifically the amount of bio-oil rejuvenator and time required) to achieve full rejuvenation. \n\nThis novel approach will benchmark the effectiveness of this intervention against existing asphalt design and maintenance processes and assess feasibility. The ASAP project presents an opportunity to revolutionise road design and maintenance processes and reduce its environmental and financial costs.","totalCost":"187572,48","ecMaxContribution":"187572,48","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"NEDERLANDSE ORGANISATIE VOOR TOEGEPAST NATUURWETENSCHAPPELIJK ONDERZOEK TNO","coordinatorCountry":"NL","participants":"","participantCountries":"","subjects":""} -{"rcn":"229236","id":"886776","acronym":"BIOBESTicide","status":"SIGNED","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4","frameworkProgramme":"H2020","title":"BIO-Based pESTicides production for sustainable agriculture management plan","startDate":"2020-05-01","endDate":"2023-04-30","projectUrl":"","objective":"The BIOBESTicide project will validate and demonstrate the production of an effective and cost-efficient biopesticide. The demonstration will be based on an innovative bio-based value chain starting from the valorisation of sustainable biomasses, i.e. beet pulp and sugar molasses and will exploit the properties of the oomycete Pythium oligandrum strain I-5180 to increase natural plant defenses, to produce an highly effective and eco-friendly biopesticide solution for vine plants protection. \nBIOVITIS, the project coordinator, has developed, at laboratory level (TRL4), an effective method to biocontrol one of the major causes of worldwide vineyards destruction, the Grapevine Trunk Diseases (GTDs). The protection system is based on the oomycete Pythium oligandrum strain I-5180 that, at applied at optimal time and concentration, colonises the root of vines and stimulates the natural plant defences against GTDs, providing a protection that ranges between 40% and 60%. \nBIOBESTicide project will respond to the increasing demands for innovative solutions for crop protection agents, transferring the technology to a DEMO Plant able to produce more than 10 T of a high-quality oomycete-based biopesticide product per year (TRL7). \nThe BIOBESTicide project will validate the efficiency of the formulated product on vineyards of different geographical areas.\nTo assure the safety of products under both health and environmental points of view, a full and complete approval dossier for Pythium oligandrum strain I-5180 will be submitted in all the European countries. \nA Life Cycle Sustainability Assessment (LCSA) will be conducted to assess the environmental, economic and social impacts of the developed products.\nThe adoption of the effective and cost-efficient biopesticide will have significant impacts with a potential ROI of 30 % in just 5 years and a total EBITDA of more than € 6,400,000.","totalCost":"4402772,5","ecMaxContribution":"3069653","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"BIOVITIS","coordinatorCountry":"FR","participants":"MERCIER FRERES SARL;FUNDACION TECNALIA RESEARCH & INNOVATION;LAMBERTI SPA;EURION CONSULTING;CIAOTECH Srl;STOWARZYSZENIE ZACHODNIOPOMORSKI KLASTER CHEMICZNY ZIELONA CHEMIA;NORDZUCKER AG;INSTITUT NATIONAL DE RECHERCHE POUR L'AGRICULTURE, L'ALIMENTATION ET L'ENVIRONNEMENT;INSTITUT FRANCAIS DE LA VIGNE ET DU VIN","participantCountries":"FR;ES;IT;PL;DE","subjects":""} -{"rcn":"229276","id":"895426","acronym":"DisMoBoH","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Dissecting the molecular building principles of locally formed transcriptional hubs","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Numerous DNA variants have already been identified that modulate inter-individual molecular traits – most prominently gene expression. However, since finding mechanistic interpretations relating genotype to phenotype has proven challenging, the focus has shifted to higher-order regulatory features, i.e. chromatin accessibility, transcription factor (TF) binding and 3D chromatin interactions. This revealed at least two enhancer types: “lead” enhancers in which the presence of genetic variants modulates the activity of entire chromatin domains, and “dependent” ones in which variants induce subtle changes, affecting DNA accessibility, but not transcription. Although cell type-specific TFs are likely important, it remains unclear which sequence features are required to establish such enhancer hierarchies, and under which circumstances genetic variation results in altered enhancer-promoter contacts and differential gene expression. Here, we propose to investigate the molecular mechanisms that link DNA variation to TF binding, chromatin topology, and gene expression response. We will leverage data on enhancer hierarchy and sequence-specific TF binding to identify the sequence signatures that define “lead” enhancers. The results will guide the design of a synthetic locus that serves as an in vivo platform to systematically vary the building blocks of local transcriptional units: i) DNA sequence – including variations in TF binding site affinity and syntax, ii) molecular interactions between TFs, and iii) chromatin conformation. To validate our findings, we will perform optical reconstruction of chromatin architecture for a select number of DNA variants. By simultaneously perturbing co-dependent features, this proposal will provide novel mechanistic insights into the formation of local transcriptional hubs.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-RI","coordinator":"ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229288","id":"898218","acronym":"devUTRs","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Uncovering the roles of 5′UTRs in translational control during early zebrafish development","startDate":"2021-09-01","endDate":"2023-08-31","projectUrl":"","objective":"Following fertilisation, metazoan embryos are transcriptionally silent, and embryogenesis is controlled by maternally deposited factors. Developmental progression requires the synthesis of new mRNAs and proteins in a coordinated fashion. Many posttranscriptional mechanisms regulate the fate of maternal mRNAs, but it is less understood how translational control shapes early embryogenesis. In eukaryotes, translation starts at the mRNA 5′ end, consisting of the 5′ cap and 5′ untranslated region (UTR). Protein synthesis is primarily regulated at the translation initiation step by elements within the 5′UTR. However, the role of 5′UTRs in regulating the dynamics of mRNA translation during vertebrate embryogenesis remains unexplored. For example, all vertebrate ribosomal protein (RP) mRNAs harbor a conserved terminal oligopyrimidine tract (TOP) in their 5′UTR. RP levels must be tightly controlled to ensure proper organismal development, but if and how the TOP motif mediates RP mRNA translational regulation during embryogenesis is unclear. Overall, we lack a systematic understanding of the regulatory information contained in 5′UTRs. In this work, I aim to uncover the 5′UTR in vivo rules for mRNA translational regulation during zebrafish embryogenesis. I propose to apply imaging and biochemical approaches to characterise the role of the TOP motif in RP mRNA translational regulation during embryogenesis and identify the trans-acting factor(s) that bind(s) to it (Aim 1). To systematically assess the contribution of 5′UTRs to mRNA translational regulation during zebrafish embryogenesis, I will couple a massively parallel reporter assay of 5′UTRs to polysome profiling (Aim 2). By integrating the translational behaviour of 5′UTR reporters throughout embryogenesis with sequence-based regression models, I anticipate to uncover novel cis-regulatory elements in 5′UTRs with developmental roles.","totalCost":"191149,44","ecMaxContribution":"191149,44","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITAT BASEL","coordinatorCountry":"CH","participants":"","participantCountries":"","subjects":""} -{"rcn":"229261","id":"893787","acronym":"HOLYHOST","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Welfare and Hosting buildings in the “Holy Land” between the 4th and the 7th c. AD","startDate":"2020-10-01","endDate":"2022-09-30","projectUrl":"","objective":"Between the 4th and the 7th century AD, many hospices dedicated to the poor, elderly, strangers and travelers were built in the countryside, along roads, around and inside cities. They were commissioned by the Church, rich pious men and women concerned by the redeem of their sins, as well as emperors who saw this as a guarantee of social stability. Welfare is thus an important phenomena of Late Antiquity, abundantly mentioned by ancient literary sources and inscriptions, particularly in the eastern part of the Empire. However, the buildings that provided shelter and care to the needy have not yet received sufficient attention from archaeologists. Except for buildings which were identified by their inventors as hostels dedicated to pilgrims, they are still invisible in the field. \nThe aim of the HOLYHOST research project is to bring this social history’s main topic on the field of archaeology. It will address the welfare issue through the archaeological and architectural survey and study of Ancient welfare and hosting establishments’ remains, in the Holy Land (Palestine and Jordan) and around. This work will contribute to a better understanding of the practices linked to hospitality, welfare, accommodation and care in Antiquity. Moreover, such establishments served as models for medieval and modern Islamic, Jewish and Christian waqf institutions (religious endowment), and welfare continues to be highly relevant nowadays, through issues still at the heart of contemporary challenges debated in Europe: poverty, social exclusion, migrant crisis, principle of reception and hospitality. This interdisciplinary and diachronic research project will thus offer many new research perspectives, in terms of history of architecture, evolution of care practices, social and political regulations.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSITE PARIS I PANTHEON-SORBONNE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229282","id":"896189","acronym":"MICADO","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Microbial contribution to continental wetland carbon budget","startDate":"2021-01-04","endDate":"2023-01-03","projectUrl":"","objective":"Continental wetlands are major carbon dioxide sinks but the second largest source of methane. Monitoring of wetland methane emissions revealed large inter-site variability that is hard to explain in the framework of current biogeochemical theories. Methane production in wetlands is an anaerobic microbial driven process involving a complex set of microbial metabolisms depending on the availability of (i) energy (via the presence of specific redox couples), (ii) organic substrates and (iii) specific microbial communities. To understand the complexity of microbial drivers on wetland methane emissions and quantify their contribution, the MICADO project will set up a multidisciplinary approach linking isotope organic geochemistry and environmental microbiology to assess microbial functioning in situ. As an organic geochemist I have developed an innovative approach to trace in situ microbial activity via compound specific carbon isotope analysis of microbe macromolecules and organic metabolites. The host institution is a leader in France in environmental microbiology and biogeochemistry developing high-throughput metagenomics and microbial rate assessments, for which I will be trained during the MICADO project. These techniques are highly complementary and combined they will provide a comprehensive knowledge on microbial metabolisms involved in organic matter degradation encompassing their complexity and interactions. This will revisit the relationships between organic substrate availability and microbial communities and will contribute at estimating the impact of microbial activity on wetland methane emissions. This project will give me the opportunity to acquire fundamental knowledge and to develop original lines of research that will consolidate my position as an independent scientist in biogeochemistry.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"CENTRE NATIONAL DE LA RECHERCHE SCIENTIFIQUE CNRS","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229249","id":"891624","acronym":"CuTAN","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"Copper-Catalyzed Multicomponent Reactions in Tandem Processes for Target Molecule Synthesis","startDate":"2021-02-01","endDate":"2023-01-31","projectUrl":"","objective":"The invention of processes that can form several bonds, stereocentres and rings in a single process is key to a sustainable future in synthetic chemistry. Multicomponent reactions and tandem procedures are two strategies that enable the rapid build-up of molecular complexity from simple reagents. By combining these two strategies into a single procedure, the diversity, complexity and value of products can be further enhanced along with the efficiency and economy of their construction. In this project, Dr Satpathi will develop novel copper-catalyzed multicomponent couplings of unsaturated hydrocarbons (e.g. allenes, enynes) with imines and boron reagents. These procedures will provide high-value amine products with universally high regio-, diastero- and enantiocontrol. The products will bear a variety of synthetic handles, for example, amino, alkynyl/alkenyl, and boryl groups, thus the products are primed for subsequent transformation. Dr Satpathi will exploit this functionality in tandem intramolecular couplings (e.g. intramolecular Suzuki/Buchwald-Hartwig reactions) to provide core cyclic structures of drug molecules and natural products. Thus, through a tandem procedure of; 1) copper-catalyzed borofunctionalization, and; 2) subsequent transition-metal catalyzed cyclization, he will gain efficient access to highly sought-after complex molecules. Overall, the process will provide high-value, chiral, cyclic motifs from abundant, achiral, linear substrates. Finally, Dr Satpathi has identified the phthalide-isoquinoline family of alkaloids as target molecules to display the power of his tandem methodology. Dr Satpathi has devised a novel route, which begins with our tandem multifunctionalization/cyclization reaction, to provide a range of these important alkaloids. The chosen alkaloids are of particular interest as they display a range of bioactivities – for example as natural products, receptor antagonists and on-market drugs.","totalCost":"212933,76","ecMaxContribution":"212933,76","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"THE UNIVERSITY OF MANCHESTER","coordinatorCountry":"UK","participants":"","participantCountries":"","subjects":""} -{"rcn":"229239","id":"887259","acronym":"ALEHOOP","status":"SIGNED","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3","frameworkProgramme":"H2020","title":"Biorefineries for the valorisation of macroalgal residual biomass and legume processing by-products to obtain new protein value chains for high-value food and feed applications","startDate":"2020-06-01","endDate":"2024-05-31","projectUrl":"","objective":"ALEHOOP provides the demonstration at pilot scale of both sustainable macroalgae and legume-based biorefineries for the recovery of low-cost dietary proteins from alga-based and plant residual biomass and their validation to meet market requirements of consumers and industry in the food and feed sectors. In these sectors, consumers are demanding affordable functional natural proteins from alternative sources and industry is demanding low-cost bio-based protein formulations with better performance and higher sustainability. \nCurrent protein demand for the 7.3 billion inhabitants of the world is approximately 202 Mt. Due to the rise in meat consumption more proteins are therefore required for animal feeding. To satisfy the current protein demand, Europe imports over 30 Mt of soy from the Americas each year mainly for animal feeding, entailing 95% dependency of EU on imported soy. Current sources of proteins are becoming unsustainable from an economic and environmental perspective for Europe resulting in concerns for sustainability and food security and leading to search for new alternative proteins. \nALEHOOP addresses the obtaining of proteins from green macroalgal blooms, brown seaweed by-products from algae processors and legume processing by-products (peas, lupines, beans and lentils) as alternative protein sources for animal feeding (case of green seaweed) and food applications (case of brown seaweed and legume by-products), since they are low cost and under-exploited biomass that do not compete with traditional food crops for space and resources. This will reduce EU´s dependency on protein imports and contribute to our raw material security. The new proteins will be validated in foods for elderly, sporty and overweight people, vegetarians and healthy consumers as well as for animal feed creating cross-sectorial interconnection between these value chains and supporting the projected business plan.","totalCost":"6718370","ecMaxContribution":"5140274,41","call":"H2020-BBI-JTI-2019","fundingScheme":"BBI-IA-DEMO","coordinator":"CONTACTICA S.L.","coordinatorCountry":"ES","participants":"CENTIV GMBH;ALGINOR ASA;FUNDACION TECNALIA RESEARCH & INNOVATION;INDUKERN,S.A.;ASOCIACION NACIONAL DE FABRICANTES DE CONSERVAS DE PESCADOS Y MARISCOS-CENTRO TECNICO NACIONAL DE CONSERVACION DE PRODUCTOS DE LA PESCA;BIOZOON GMBH;EIGEN VERMOGEN VAN HET INSTITUUT VOOR LANDBOUW- EN VISSERIJONDERZOEK;BIOSURYA SL;VYZKUMNY USTAV VETERINARNIHO LEKARSTVI;NUTRITION SCIENCES;TECHNOLOGICAL UNIVERSITY DUBLIN;GARLAN, S.COOP.;ISANATUR SPAIN SL;UNIVERSIDAD DE VIGO;UNIVERSIDAD DE CADIZ","participantCountries":"DE;NO;ES;BE;CZ;IE","subjects":""} -{"rcn":"229258","id":"892834","acronym":"DENVPOC","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"qPCR Microfluidics point-of-care platform for dengue diagnosis","startDate":"2020-05-18","endDate":"2022-05-17","projectUrl":"","objective":"As a result of Global climate change and fast urbanization, global outbreaks of Dengue (DENV)/ Zika(ZIKV)/Chikungunya(CHIKV) virus have the potential to occur. The most common pathway of these infections in humans is through the female Aedes mosquito vector. DENV is an exanthematous febrile disease with varied clinical manifestations and progressions . Due to similarities in symptoms between DENV and ZIKV and CHIKV, it is difficult to make a differential diagnosis, impeding appropriate, timely medical intervention. Furthermore, cross-reactivity with ZIKV, which was recently related to microcephaly, is a serious issue. In 2016, in Brazil alone, there were 4180 microcephaly cases reported instead of 163 cases, more in line with yearly expected projections , , Thus, the sooner an accurate diagnostic which differentiates DENV from the other manifestations is critical; most especially at the early stages of the infection, to have a reliable diagnosis in pregnant women. In 2016, the OMS emergency committee declared that the outbreaks and the potentially resultant neurological disorders in Brazil were an important international state of emergency in public health, as a result of the associated secondary effects; these diseases became a Global concern. This project allows developing a highly and fast Multiplex qPCR POC platform by using FASTGENE technology with a minimal amount of patient serotype. It would reduce the time of analysis (30 to 90’ for a standard) and costs. Additionally, the sample preprocessing and thermalization will shorten real-time PCR amplification time and will be integrated within the microfluidic systems. This platform can result in a commercialized product whereupon a main market target would be pregnant women and people living or traveling through/from outbreak risk areas.","totalCost":"196707,84","ecMaxContribution":"196707,84","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-SE","coordinator":"BFORCURE","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229280","id":"895716","acronym":"DoMiCoP","status":"SIGNED","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019","frameworkProgramme":"H2020","title":"The Diffusion of Migration Control Practice. Actors, Processes and Effects.","startDate":"2021-03-01","endDate":"2023-02-28","projectUrl":"","objective":"DoMiCoP develops new understandings and perspectives to study migration control in practice in the European Union by asking one main question: how and why do communities of practice develop and diffuse the knowledge required to put migration control into action? Unlike the nexus between expert knowledge, epistemic communities and policy formulation, the nexus between everyday knowledge, communities of practice and policy implementation has not yet received systematic scholarly attention. My project bridges that gap by focusing on intermediate arenas in which communities of practice take shape most notably the meetings and trainings that gather state and non-state actors involved in putting asylum, detention and removal into practice. By building on field-based methodologies (interviews and participant observations), DoMiCoP sheds ethnographic light on the role that ‘learning from abroad’ plays in the implementation of migration control in the EU. My project’s aim is threefold: 1) Identifying arenas at intermediate levels in which communities of practice take shape; 2) Analysing the communities of practice by focusing on the configurations of actors and organizations involved, the motivations underlying their involvement, the process of knowledge development in interaction, the conflicts and negotiations; 3) Revealing the role of non-state organizations (private for profit and not-for-profit). From a theoretical point of view, this project goes beyond the classical view of the implementation as a test to assess the effectiveness of policy transfers towards an analysis of policy transfer at that level of policy-making. From an empirical point of view, the project expands knowledge about less-studied venues of policy-making and provides original thick descriptions. From a methodological point of view, the project engages with qualitative methods for the study of policy diffusion and aims at responding to their main challenges through participant observation.","totalCost":"163673,28","ecMaxContribution":"163673,28","call":"H2020-MSCA-IF-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"EUROPEAN UNIVERSITY INSTITUTE","coordinatorCountry":"IT","participants":"","participantCountries":"","subjects":""} -{"rcn":"229297","id":"954782","acronym":"MiniLLock","status":"SIGNED","programme":"H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.","topics":"EIC-SMEInst-2018-2020","frameworkProgramme":"H2020","title":"Mini Launch Lock devices for small satellites","startDate":"2020-05-01","endDate":"2022-04-30","projectUrl":"","objective":"Space industry is experiencing the most important paradigm shift in its history with the rise of small satellites and megaconstellations.\nSatellite miniaturization requires to reduce significantly production and orbit launching costs. To address the\nnew challenge of this manufacturing process and switch from craftsmanship to industrialization, space industry is turning\ntowards other domains looking for new solutions, disruptive technologies, and manufacturing process.\nMini Launch Lock devices for small satellites (MiniLLock) proposes innovative actuators on the cutting edge of customer\ndemand. They offer plug and play solutions that can directly be integrated into industry for satellites robotized production.\nMiniLLock is smaller, lighter, safer, with a longer lifetime and generates significantly less shocks and vibrations than\nstandard actuators such as electromagnet and pyrotechnics. MiniLLock offers performances which have never been reached\nwith any other materials.\nNimesis is the only company that can provide such cost-effective actuators suitable to small satellite with high performances\nand reliability, enabling features previously impossible.\nMiniLLock will accelerate and leverage the commercialization of Nimesis technology and ensure Europe worldwide\nleadership\nand independence in the new space emergent environment.\nNimesis ambitions to become the global leader of this domain with a turnover of € 26 million and a market share of 28% in\n2027.","totalCost":"2413543,75","ecMaxContribution":"1689480,63","call":"H2020-EIC-SMEInst-2018-2020-3","fundingScheme":"SME-2b","coordinator":"NIMESIS TECHNOLOGY SARL","coordinatorCountry":"FR","participants":"","participantCountries":"","subjects":""} -{"rcn":"229299","id":"101003374","acronym":"NOPHOS","status":"SIGNED","programme":"H2020-EU.4.","topics":"WF-02-2019","frameworkProgramme":"H2020","title":"Unravelling protein phosphorylation mechanisms and phosphoproteome changes under nitrosative stress conditions in E.coli","startDate":"2020-07-01","endDate":"2022-06-30","projectUrl":"","objective":"Currently, we face a global antibiotic resistance crisis aggravated by the slow development of more effective and anti-resistance promoting therapeutical solutions. Protein phosphorylation (PP) has recently emerged as one of the major post-translational modification in bacteria, involved in the regulation of multiple physiological processes. In this MSCA individual fellowship application we aim to bridge the current gap in the field for prokaryotes by unravelling the unknown regulatory role of PP on proteins involved in nitrosative stress (NS) detoxification in the model bacterium E.coli. We propose to examine for the first time both global protein modifications (e.g. phosphoproteomics) under nitrogen species stress, as well as characterize PP in individual proteins involved in NS response. We will construct a network model that reflect the phosphoproteomic changes upon NS in E.coli, that may pave the way for the design of new bacterial targets. Understanding how bacteria respond to the chemical weapons of the human innate system is fundamental to develop efficient therapies. We will pioneer research on the mechanism and the regulation of nitric oxide detoxification proteins already identified as phosphorylated, by analyzing how this modification influences their stability and activity in vitro and in vivo. This project opens up new research paths on bacterial detoxification systems and signalling in general, addressing for the first time the role of PP in these processes. The proposal brings together transversal and scientific skills that will enable the researcher to lead the development of this emerging field and position herself as an expert in the area, and aims at establishing the importance of PP in NO microbial response, a novelty in this field.","totalCost":"147815,04","ecMaxContribution":"147815,04","call":"H2020-WF-02-2019","fundingScheme":"MSCA-IF-EF-ST","coordinator":"UNIVERSIDADE NOVA DE LISBOA","coordinatorCountry":"PT","participants":"","participantCountries":"","subjects":""} \ No newline at end of file +{"id":"894593","programme":"H2020-EU.3.4.7.","topics":"SESAR-ER4-31-2019"} +{"id":"897004","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"896300","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"892890","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"886828","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"886776","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D4"} +{"id":"895426","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"898218","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"893787","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"896189","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"891624","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"887259","programme":"H2020-EU.2.1.4.;H2020-EU.3.2.6.","topics":"BBI-2019-SO3-D3"} +{"id":"892834","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"895716","programme":"H2020-EU.1.3.2.","topics":"MSCA-IF-2019"} +{"id":"954782","programme":"H2020-EU.3.;H2020-EU.2.3.;H2020-EU.2.1.","topics":"EIC-SMEInst-2018-2020"} +{"id":"101003374","programme":"H2020-EU.4.","topics":"WF-02-2019"} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz index 71f132ad16d88afcbba31ef4c7c6206c8e698c1d..5672d6044f2b502ef499f0c272222f5b9435fc19 100644 GIT binary patch literal 29456 zcmV(zK<2+6iwFpTP!(YS18{O>aA9&~WKeQ%XL4a}ZDn6~Xm4y~E^2dcZUD@^OOGST zl_t8szrs+9nYi2)YN5P>78iBVi&3q_j6b4Za?d$t?r!Ez#?zcNXs9cL52qb_96OKieCNM> z6sS;q^uPjRrj|0I*DxsHn zc!ht@M4T}-Vu?uSDo*7v68LYylVHMl95NZl>WXJl#ca4_Jc?MF@u`@oC=^NBVT*}` zPs)^~0=}NfE5S0wQWZ#%Em{r)-q)nOLY~O5@uy z01z&ns3bc#62jjRkHb4z4W4DEUB z#evEb_zf0-cq@<+1r+fZUy?CPK~kL9Y?7#aJV60wJTgN!e-xh)VXnu}apnQnBb1|2 zp5ipaYM9UO4S6a;#sipE_-z5x9m70DOZ#^NSV5RDdSEdI1y4*EpTiH#6UpHxf(egf z5v4nuL$Pfy@*oiKq@j!?Y!E$a!SYm~;P_7vCXccd|D5N;NCwezZG3{QdeohKsTIsTF2(ri0G)k4x)iZdC=IlL8*>60)toEi9Q4!Z*0 zg(r*nN+v4CW6Glrn+P6d`nf3|;R%zoIXrfE>pHY&vtN9e6Cf=R3*HHJrm{5%n$L>bSrghorcbE$HgraXji zrdm^g9!WEp;WP*BD`SpoT7vZSHB`+aTL^(lVWLz7U)nf%9r!DRCnWmZ1u#8PsjBVi z-`$J__KfvGqH%)isu+}|=W1t3rfH4}ajrl^NTdI(0|wtz32sW5QCU8>P~lJX?1?nf zU%cW`Ug%K#e2@rGy|l~Bp2jJS1l^e(8XoF+P-XLsTdJ&FAN`WE9o_M(JPXuJ5A=#B zQsr<9N)d&m_>3S+xER%}_&Mv_Fi4I=gdfC5gFca}+*^7maNS!k?QB6O&VtTBOC@~rdIw&z{N}MR`5Tw>AphK79=>gRwP}j^v zD0z9^=!<1Xlk(iX3T-EA|rUjA}c9ZU8e( zv4{s#*bt+UB>Ca(D-~U76Avc0m&27Mz~<6l-Sm4qnvhRnpFIEM*@j6e_c*M*bF#HU zDDRA2jr6%|QxCw>ipQ{lVq86KM$aT@P0-&oW*jO^a#Boez~Oc_ngBi%r_CjXcaDn} z_u)o^&CE^}tk3Rbbnf)?1*`#zx9tk959=;pXlLBcb$c%O%#5OWVwyHYq!tVo3JrAp z*4aTHK0MwiU-jTSKdyRIJCZvdN*DGE9F-+24$&d(IaHZ-Nw$29eSA7N{7GwLb*}gx z+>fAo(+SSf-+uMG-dUIJPq<9dzy0dl`grlpfZlBFy0Q=Ve!q9T-(zq4=dW1z_<$Xr zoUy?Pyn6m+ePjc3gDs@7#!*oZw9(SqMA{Bj|M2kS_@H;zV{e}H_V;^l-t>;&by<&f zk51q8yZAb6i|*NZZ@+)qeb+xZ?loipuZ4%W4#jyfR@Op>TES@>o?I02zAdq2)`nQ8 zXD6rKm%TF->L0aGupqs2w*Kv#?%B)!Ss_&)r8)c@sW(2LF3`!Y=mT^#5*C8X)RXZ_L@V&Q*jFEs zalHOcrf6NWLolg>3Ao`YSaR&+-r4?7zWS(fI~DfySz#6IgXa!cs{Vp2W+>Vb)6)HP zf1URa`}77E2g0jP%9v<0SK@A?u4(sf=>5{^6o$ zR!hKOXsy=o!G{e6!_^YFAK`x<*2~-NwneIQR#;Gb?Dl~@Tn=w)uqFo$zF#WA^KxA- zm5~R}5bMWR0*wDL7|-kPV_cX1Bjxd>0kvCgN%S(w<1m6>iP?y>9{gkdFGGQmzA;tdLKP_Ri#q6z2GSGm0lreaby#z6$$h;8dDA4-9Pt{(3Yi_2?#tDXE zM4)0dTUtK+vdmJzdwWiHrwx7Cn3V2VT&ajBbiT+Zmn(RVU{@kZ6^Q#pk6Cgi;4f~5 zz5WA!Ae89kv6)z{v_i!XYZ>SvzNf*Pj%`fC#Y|oQ5Xl8tQW6ZVDFvlhJh+r>tkSs( zc`}tuGWh59uSKvD*S|@#1!r?LPgY`u-@rjH;2XCXVjk_y`u^q#CQ*`VCCS{xg@lz8 zZ2exZB*>T13pn-b_u2YA`|F?JQ<+Hr`WINC_5TEe1r|7Zg5TKNo^gEaa(Yw2jj8Bi zPM`=%S#y!W3MH6!eaOhY%I-OOXnT#yq!YQd6SQ`ofR^TTiuJJ9f97mio$8Q>VEf^@ z7U@?XG5YS+`h99{-|}qJ!@N(3FTLN4Vu01Oe{#%v?6gPryN+V4fA#P%hBrm@*)}7c z9re%I0gU$oLoe%pIb#Rl55MdUz_`$#c=+hO-6qcH(J<|@ckJTeT_4QV-Vqy|p1^;> zalig=1902Hw?DwycVB(D7~V9gBgUIgVmL? zm=h!bGrI5u4ukHX1|CWkO1#e{s_G?u2XtyG;XyOr2-WM66aWIe3N8FZYt!**&9aK0 zP*M|w%U}SJ$g%|g)P6Xp)LRtEF-}hNPQln&gem43Y`n^kg2x2-7>t6A6B)KD72ae!58Wor|(YOn!13(fa$b z3=|Uu&LC8c6jJf+7Wd#!9Vcg)szjg2Ii`^DP$mJ4%vd&-S)(TztYX}wd!PsBk;-^= zn&C`=Of$}U^f@q~`qcy!u0A(AwJ!J(coArO&k-|7sX4u(t=mK!P9+}55T*ibeD^@E z{}&xsXJtJdOpo$BbFYb77=nymRFnvH;E9aU&V_%h|D4GguS>I^s5IRR@{CXj^m%>l z;XmOE!baM)wLH&U{{{91y4j~L7aQD?FUKOYl zRXUSlo+PlhBE+ku`Ap%sMIkPWwnzk0lYxo%BYcIrVE=914;kpl2%QOSk#KcHU zv*iMvHa$&^@EQr$l5PBqwx&Dw6s$lnsaO&5kStloI^Ntc$SQQtjFjJAm>^zhtIG2@ z@TZ_6oXvMG0)l^m)=trq<)HghHd_Dt5RX_&`hApx<<-2gN}!_9D5OIkWGcbGjAeC* z-xDpWJ4ZjfOV-~naPqHzP%+#eY^vfZ{C*rPFX8VLb_@c?RTQf|3|alKT)=)}={#92 zFZtz%Z-ONo7Mo4Xa(KcjOwF>Ylu~)q_@GcKe&cLw*f3kOs7EltezndhOF5IJV`5Q45aXOP3 zsO%I2>d8dN48i-sWCHsEv^aYM^5{=vvN2nf0rx`kPIVdMr(n_adBhB5m7t{eUPx7a zzqLvB7?OW_|4Du+QV>m z-VtAc<(R1=_Fcrso8!K9w|djY&zg3?k+@Wej`M45E#7FMa2#3^AZ4dt9yKs;y?Dao zpvaS0fO;FKJ2>jKB~jBoSicW*sgsdts4Lh?G@$Uw`n@?+ut}mPVm40%?yMq!f>D?} zlaWk$oKfn^d>q7|fi1KCV+60Dv}xa#RB(T(xk!Nmz1&k0=T_kPy6x=z-U^efKVVyvGfG}4wLYE0OG3Mn;>qC|Snz}^r zOQtfi{6SMBQ2Ac)n7xfwjMoXcvwy3z=Zc}D5v%Lp2Vk**A-1x;8bR)h4DfT3MdJGR z`i~A9@(>O*XS4P9$*RK^>+eH${d@Rs7=8FA)UYSeiYt`5Oy0L=ZF_dLT|aLD4%jqZ z5h`I7qCbbT#g49jcgjF8wpfj$58r|XSUKaKlzf|Fo$(RO^~#`5%sAG6z)f#k9B6_F zc6!wJB$BtyzCkB7z(ufN$9c9YjL5el$`YQ>&{S#|<0V{y!p$<{Ma~AaSRhcL@s`ZN zO^YM2PTSPNht3MUhkMwX?GO~rsW+>_+r6nHKc8-m{5YPxFy&z&3YN;{+K52FF8dhi?@qg7QP&jv#(V!MU!+Gnu^}b$ANhqm$!G7YiLBAtSAeWuc{oDWKWdU zY^KVl21ip1k{V|L0xymYtrx>m7Z0{ku1KIlYxC{P0bHn6A|_fMs5=6`vx$URcvB zaV8VBTBu8X)8Gp$FerV6m?^z}1} zqW_8rg8tUO)Yje8n1zA1ukhy4S#xQ`)`nRO)>4jct<-lBjnlZlOr<1-{?p|1@r#^A zaKg}fn}I6~MrXs6e&lp`g*`gcajh|JnA%kgD7;6KXNZXXlXCq9v#-R>B zPqn6yA|Zsh(IJ9e5O#e1!;o}l8IiuCbZeH*semCy;Ca7Vw=^|B=XvEl8i`M98NeN0 zrAHjoCwN7{Gx!1wDsXc)K+83O6Xxvc^HF>5%+p55@Thx%@$LTc!NuVGtdH0`n1$0s z&Uj8%GmIKj+o76qNsdkGT5Mnh#_RWq{3X(Eh^(^GB18=B;UwVOJpY<;lym*Q0rt9W z0=$~n&IBB;|K-(L@A#m{&Q4Hr1R0i7p-apr2_2m?g%`t6z@k{Ij5kjJ4Ta?GrR zYmAS85sgI}OcKl@!3@3?$u!N>Xmsz5;I)*(wk!_&2MvTV485!E1-xMq^neAP9SaRu zSoAAy+}%W`DnVd}5dckhIctcyZz@HJ24owm&ntSt1=!2jiKeEeR!f%*rfiTUYIY@( zAuMeyvK4DA)58!%ZDrlIrHOfzhaGV8V7-({w5|Vrg!nehu4!ibzom~F3FFlo$K2Q_sCADuT~7I>x`@X%olbks zWTT>b2fD_-nRQC~wI=47>en*+;~)25pZ|n4cDMBmI-&axwa@w1iSv48?{M1c^1l&i z3a1k}*TD!A-s09nz@l-kz>nYb`sZzx2|ue{*kwYxx}$C3nmOryHR?*X)-G84TG@Z~ zws&@Vas2w6ef3d&osb`@7@BD2Chst6uZd-6&;3FqNLnB$kB%0P$%^BxknlX1q_nBv zV?%+SwfvrT6C;d;EibVevR2xOQidy# zeWjC7Q%V!5g}|Vhy8a=Bg}na#hkr78IjqBPZo*Nd3EH-a=5aAUzr$jhie4x&7jSDesp=;ntC*M7+o6zXeyXT!Cpza%=$VlLA(CAZCl*Mb~pE)F&= zV#A)+Fu#aRz~;_yNIO}lJ3{L8vf))qCF+Qx!@Az}cStl!_cP{~z9^|3>r0RqOE86r z*B6Lg#NNEXAG4BPJuGLoPB~m=Gs1%#SY|WUddot3jiow+d#%&RaGTw8NM7fwQHr*H zPg>fIyLK?*kl^bAtIkrLUNUuO6m;m|S9o44J3~VW@i$b)6sRg(Y@=+ppla?>jpi|q zTdNb%d=H6`BRT{}0$EQ-4t%u4nU<*1#(F-mUc__O*15?p5M>Oj@q;#p5NuBX^%xI) zMrPu+Qucde^xKr9J6B5$XtObFayHMS6iIH}`mINc;`P?K{r>F+P8FJo2GuJ>O->qX z_3MS9%Ob3mQdp?znBOTb)Ly0CTd)fkJX~=EtYD4VG8R*-P{axZ>uMHBcGgrSRPI7B zmjXqaqmf)y*Hr~DER0Nr-ExE3d|x1`$E)3C5?lZFuuprStQ)s0CsW(60d1A;@wg==}}v{Yk6~x(q@ZU1rykEoGnVMk}NAtiqGk#gEub4B~!!Yg5w(B&nIZh z3);MDg95D%PSE-<3B}~C%e8QU%G_hhyr_x%tqSFB5*yL3>Ndl(y5`~0akV_&R)NMQc%HwPZA zrJ6H%fG%#e(1`t6zb9WuVO(&ojZeJm4Pr2&8S#HPOa2 z?ei~K{~R`UWN3S-QmbO#Pq>U0981ZPD;eyJVY;S6H>)7)PFQvrQWokWxRrK)Fh97| z*ejzf@1jLLDwNvkdc%p@9V`|EB&QX2uDX%Z4Jkq!#E#IkCwxB7mxyjpb2Qw`Vhh3k z`lt18%_lciTWpAD-uIGhi(lYwn#V4#Njgo2w#0;|arKS4`wIaAP8zyp%i?=fDlR|j zmZTsIgGM3k^^zUKsU|8v{*L2sFVXjN5ylG1q^!Dum|HO~2DTgq!7#RvC{rY~#2RK# zDL=0Vvqpk|8Xi2gyd?1WWiBk%$voCCWcD(763c6uNUeE*A>r5MA;l=D*cAb+nnXa{ z3}|g+bnZzZ&>lC!JFf65)|mJOL#N@xHysuu6cGhkUdA83F|NXgZ_UQU4f)}l#|q_j zD>_2GCQR~p{R1-Tk42e3PgOD|e;zC-kQcmJM3`fWq-SUnHYDHfDV5h0(5mQcrix&tglLJX@;!j8x9zu zM?w|)U>_QW7;kcTtuLu@R~*zKV{?`3@NEv)4@O{f&0W#37#>7j;W3%r2=NW9iZ`di zSjQ_l*cv=4>X08D;qzMI=5DT%joNGxP+;W_S^oh~EKD_p#4SD|Uz^QqjQkEJE4cEo zOI9mhF=hx;Kc_T9hA@N~$|cN@5g4|Ka}Yb$Ii$jIq!J0x)Y6TZ5dCIVKGvtqTj`$1 zz47!8K_rfmy$;x#SH|mR;rcysH_Ru6p@}izz4iZ-HNnuEP-PxBfvxfpAoJyU?;HcL zZQbNYDI@s1SUaeRBsDSIoZ?yO3L zrLMEr&@@Bfe%@~q(mxc=<`D;2-=2n}p_-j)HXek7YLBbwxI!!GIK@I_*-$J*GE7J7 z?Nya zg(ZyPFsCqkDk+MCv+C&Ut~khmP63o+#bpD}+B95mMzeZ7^6oxvw{W2082UkFwrd;0 z#VAZEncXY+>-Bq41U_eYv9I4h+BEcddw%Y2=2|L&em>b0M(mZM`skwRkUyx%+iUjb4p!@%Clz)IA#aKxZ`duFU~dKH4ao=O zM#vKuWQ-zgM19pz8;=1=c{DvgzuokFQ)D1Xp_qvfnz^_%M)!0HXtixsuDe3uY0Ok! zA^~;1M@CCcl*Ue~%;zk5E$G|?C9|`64bSXm*p|n!xvjc3=Dgk^J)=aD6mW>*Wkvc% znRp2|#1I@wxc=_qHh2uUJiU|e$YF~ium$&8!R=9uA6>v9bubopb9%hq$1ZEl4(l3g zHpl%;60ZgFw?QoLc`m#b|LlHoS~8kEu9$t`oazu8n4#;EUx9O!Bfc@D`U(+0fWNL? zR>)t$xCbNf;7I#Z{~+yg_$XP?hQQ5a_N{4f#wKy+GpSOm^4; zbzBEm3BG*&Lky}c)Hu5j-ym1`RB0*z_+ouJ?t#Dd9ISH|+o7&Tdj{oHDqRJu58sAl z%h;)0TX$o+m$G9jn1Hs9_0A}?H#dJ|i}m+Ou-ZsQzFSVEHmA{wDs?IZe+;RHsdJ@}k|>yM3yENltkBmf1d2bY&tZ$tC-?a-%{=o2iWBgdr(daQM%&U{|ywx~;MHp6mr!MymgkkOVvC>>NDIt4Zvi29E z;#V(%N3h7SxlO;+vr->**jtW{9bH)^U;8hlh(_=*Qc~DD)iB^ZV!gd;B4@pKKsn&h z#WiN(hShJ2%w%&6;$Ve5JY<$Fiqcp#Ri;#Pl6P!UxQ^Agq#|LhtQv{IT(xMnt-n(N zi${&UHSgJUC$*_@{rhNB2J{X&Ca?CTHSW8CXkX*R7ROm1?v8a*(F|H45e;qEGd0;v zuXCs6X_vPFT}H<%FV*XVGJhVu#-H9P7*+(FQ$kc=WdYUFkA)u%Yf1V$1gD55)AMe+t`?R z@k0Lr3M1KGtnV}C+}0`G97X8r>&>9KbVVSX%UZRrVYa7_78pqi3Rtzfgg(+Vi&z{8 z<`ez%K?isjT)ZwXdc}l=9DfPd>;9r84SWi`sN7!~HYAQEIS{X_5}HLHA8OM~4n zBQuJ$!xj^?_GKI-f~O`Ii2cXqNKhmTo&pmtq7**~?s-7m$B08PEm{K=U*U?YxE4db z+3oaJZuGRCe@0T`!-$XZnrIB7{$^WBFE*t2^dLx%y(VZs*` z@!eM+QM3;h2!m<#2;GMpb}gGn94Rbk3_ei)dz1)KZgt&7p!iNAf@MJAH0&%^fRzz9 za;0QgB!bNV6C~IbJb@*KBZvBBcLiim-D}v&upfyKjxK^_`!URye(@RmxPR1t*1$Zq zxZ~_g;bido^pVI0xQ*r)mK!h?k}l#bF=Aj%*BEM+Ru^ zdNYjgz|uJJgyLIm)j-$$$bR?W6zt5gKIC1Q1j&ZJT&-<~PwS+$2V#aSZV6t3h%5ms z3oZbb21wwFdw7J(jKD+$V#rCeO_pJzu=WCUnrW<2RcUGe6r4b@AiPykY+;U(#Lgtz zd)2RNCEL-E19NutDU(@BXNbe$a~p-2uVyNNDI&@MJg>wfP?Z^J-3}^Yl&IO(0sS~z zz4wp%)RcU`ryH!F_YeF3aM9B+MJw7x+n3Cl^?TgEnqJb|R`+Jw)Wja`$@N7nvY&g*dfA0}z2d!T*2qR2(T$u@?)wc0bq+=x*m&SQ&MMbQCTf7dwZM}z%t z@pgOBjOX>dcK`gq&^qt#CaM{}y*SO2;-X9~c&0AOLKPCLc+Z?w+z0rl)snJyy*KUE zhd!n)@tZN^ZNa>bGa)k`!)T}h2};Aws0+IU3{Re!m|_t>){=@qrJEP|9-%~!Q}C=o zZ{x_h9hYl-gyjxZ^|QJi3{$4b05;jd_M2_ZT`dr6BbtjO_KxOf6H@g~Xw%05R6UNr z;fC+TyZur9X|J>A5^7Y59}BkbK-KSx6{>fOY3%5maz8!=eAc3X8^CP$0K0j;k{qL8 z6O!#-P&c0=v(F|)i(&HjUv<%Be%Twq}F~iF62-iagQ}l(?6o^goVZ>dsaANTj04%maM+Y=XF*5LSc~B zDzF%eY$1>kh)`^WeXsx0)KabAmSu!j?1BxLj|ZFTlfrb0hRE3?Jh~YcDYyiZFzK zXvd`tbQVE{3QMr%(K`#PJ#To{Mb5}*d(aZGu2GOFy%=+o4%Y&iv{_Z8A)Stiga8_G%TD%%f5%m)#L138|sPXpbsaqx3nI`z@CqLT|z@w9K5yKOnq_7G|Kf|N) zPO03tX08s*!4bTfA=t*C7_^!bV2bYza~a{scX%t+wFZ{GB!U(KRUGY=N(Xc_1S6db z12!c|?YBF>7%LaKWiEzQZROK@LuRj;>lE$deD(3KK6>9DUEodC_eS9u%`bzW7< zx~uS}+aKF!cynW8TQ`b!OY2*Yf^G~NSYZ38l<-N@f4!ZI%b${|shHzD$utNs&? z1-0su%>-6lqbovmj6*SXIwKfNhaTs|=pB-6ha3)U<-9CfX56G1{FI_Icf7JjV-DpkfC>2zU+`>P(>xpf9I7V5a10V%X|ohq2tx zHY`yw$2{b7`UA!vIG0u9VBYy_ZFxbrTLn}()PWIUP{G)+CY~Y+BJNt8sY%c6Rn6Qw z=0lTePtAnLQXo(_#?;6o;LIaoRU1!TgXmHHw>teNUV}I zczGJDMI^$eJ|xrWwe{`}q3GutiM~%|+;@dvKOVI0H|FOAmp8IW*39qQ7$-g7_Rs6L zy}sYgzSw@dTY#i0IAw(}(m5v&oMMs55DyLr5tjiGk;?Jzf&>lR#(4sgQIvCxrVTxY znqBCK41$E5+*lE3&L%vgh-oYk-N=(A72{zX64e15D#CkNY?X5`li<5B9SUz6%qZCL zT5FwA4{9~jp(088BAn+0Y{aQP)9_n72viPN{Y;wTmX1^Dq9?bYEf2PsIW5{R&EmK_ zwarN8AO)%_4Kbl`qP6+*<^}k;mK>fVj*W;Y+Ymvt0=M|?4sZ1Bv^TdA*toH!2NBk}l9Jfva>^o=KCHtJU@a z7CqO!_?g;cA#1&PK%+8c!^L)kKHbeq@0FDjlJP4ZuXwWo=WRI_Z!5m>s=+mR zBf!Wg`3pAc`7QdjJx%t|azU@xPh&(lTc}^B_`?Z%Arc{*NQN7R=V`+13#nA9djSt& zciOhv9WNxWN2{L7+ksMf1OJU5RT1W~hQA(MMtadpBH)erDv4sWUe2a0AG}21R%yj}mb1VX+0ZBPHmd-Y9ZIo8KEQ zVXs!aop{>UiKlnniUVoC>#9Rxc!H#%LV|PKJM@xxYD=oO{lTDjtodgLy?=bCMMTQk z^$+ogD5KGIh(*pYnUjeLnHi4p;4QoMzn_!i>5OdhA~^eH4sL{EC*Vb3U`yP`k!T^_ zW(LN=lTHFv#@p^SEU&RqFDuw)^OL)EDXF1t?%CMHb3IoLt9C${R_Cthl;kXSR*F)I z;+MIcYtD)?gi^p!@(~XVJQH(2GZ`dmPnWoZ>!54F6&NR}VIIvmQMV$Wj*KPJM43cu zhZ$a|LNJ4oAB{vpB@p^U69EH2RzIQg)E?%JqL@1VYF~O2Im(`$aT&3nH(D;XIEY$G zwNXV^5x`P<8eF3}tVOzkN2`{ZdBl_21M^$5zR<;<1+KTlu+<|bdQD#o*raG>V%>=; zd)iiQd{jPS^M~6K$5AFPBQ4MoEfg+Qqin&osquI4H*5gVG?7I%Rg|3m z9o3xg%_qxLBH?by<;lY^q1HKLSam)t_E9VXt7eWYQmSAnHp?iFiOvTfMf(P{lEf;k zD^LREr|i8%14d$O{I#-DRAUohD`(5OsL?c=_i-p9EMX@*%+%ltI{Wk}k21Lz$~2HL z&=^53RRNM-5dvwHvug--VEDsjQA=3X+)Px2#j}A~WrM6O6#Fwv>isvp?(yLDm(O-M?G7mD zaR1da_Hm_{)$me-BGO$^TcZPCqn*ErLS_?%jzMLWG>LdTtkX^;0M#vIk@D z?W6XoOi^TCuz|TtK#{}N*8l3e@ZCwN6J5X}4FOGtEcaT^h1oCMu)9 zzF@D#vI@&N+Xe0{x`N;?yTL=EdIeQIu>*(iO5ai7aVZQO#92Q3F}g130#93c5@tNe zvw|BHgW^shr=`uE5#H;Y?X9tM0q$eK16`f^8MkBf6)V*jtiOLw*Ig*bk~o+213DET z&Vlp(2#jN&*9!s8id$S2h~g)(F%@+KA+1f!A02#~DxyJ25~0K>%E=z5-AQ(lVY4g0 zV>nNY*~;0KMEVM(vsZbRVx!>lC$XD|uC&?ROgyhOdFle#O9o=oAW1nYwui4Q+5gL29qu)e$ zqZ>!S*W5)|$daIFr$gLG0aEUWP6gW2Mp>He*_K(Vuq423R5k&#<)}j*X;d;EHXQp$8x04>c&kXhj~Wby*BJ)P#!ty z2GsEGC~L(*$C=+1U;^ zd7XYCgZpuuO^i9^um;&$qFfmZ?5zc$OzT;9x2xe~K&VNIaKfLu!p}4oyMYF8&g1_q z3N@5atG~F}U$#X*;B)tqDARPkYEb3q%e5e?G@If`C-Ja0FV^Ar#&JJ;@MGE|xpjGO zN{UTb%qt+qiW>PRz`?aK8oMRsCHZ}?KX8!C@OUfs3sP_=+q5}l=1(nN1Am-$)@34Zp6x8j!P894c;h|30>qg1J&Jy(HsGe@J7l@0nWTUffgWbAJ zHE(YlpGG$zJO(DfWIAOduxd8kj$1#~9{Fk(v`VamoMLvG#-^8Lx;jtUdtN z7TaO8GMj(ew9r5lhiuk0>DbaR6^$Gdc!h?t(- zrLNQPmK~10PRc}?3Sq%Yf{EH`dh+u|yCo;fKsMZ11C@v&0f@ zSAm)kveIQdw&{8`+jusS3GS*q-faTQP1+E@okKlQvJJep5ja{lIP#4yCH^tr! zYb50focTzZq9!w{_4g%Dy?)&cu&9W6yj0$B=vperZSBR`LI%?)*NmO)7}PEPd&=j0 zMSYV~@FC<_Bxp`(2!@=BxWkU%YPdqQ0~Tx2cVCJGKHuzJXj**iiK+<*JqC-)5IMa=OwC^?sme(P6G3241uukHYzt#Aiv_VH zNWfQgu}E3})#ikFE&HlX$eCI#*56+;Shx<$| zbb+#hY0(q4ga_ow>ccl7s44uUnn|9hMXMXdTQRC`AN^_VuEB_QMI~r3E(rW3P-S`T zY$C`v&4)xhHpz&c-94XeNyr+)wR)!oLGHJK6KfjMy#n%6b>k}eND1*L>4t@`ZRw(I zMbJxLQ`7e0M**LG)?h8l6H4&IplS@p*vg>Z=36J*HNe(7NcAF-=DXqXE{t5m}$zK?kztLt{ ze?8$5Y3+RJ6<( z^IzuNpjUSq*L6&4YA(GEIc=r83;N|@jqq((CeWv~^ISp!(eB>>jJ6wM6l9|H;Xd>s z?aBCjyNukn>^r6TUgKhn)M5|Jo0Rm=w(`#UgYWsQ_Nr0bQvLh3-`?RN`n(?KuY4{T zSRzJnly(^1)SJhNaGH*E4pznZe_Ojp-SIo^N$_m*_OD~|G|@{@NU?o)-}=4wB>B8% zd?chVJLtZnM=C0sg31ixW#Uh_{#zDB$PEb}|Gf4jucbL`;Ae&3u6pt|SMLtrXm9jx z&pD^GNMGs(`&13=ra1aT9?%|F?$eF;z8T#fh0}&E+pA8p8B~#F41@^=V;a)l@}wJ{ z)1J^S*8^M{mG#2h`-4Y!Ou|0+DeXz=VlUmE@U~%EFeBW!Yaj6awvZJ+trZGMJ`~wb zp(a0dRrzVU3?`DpLI(vrW4EJu>TKv=Xza~yNAUNKjzn}=Llv|-0H^AtJH}Y-9y|p@ zyBHz=ahAmyQ;c?0kf@>5Gz1ba{=ZGFbMCC*V_p zTG{P5oKk~6GF^&2j8*)ITgOq}Czu4;>F+y!^)Z%DgrbP%VJ7Xzm=nj9ITPtzCe+*L z#~ID1uEGP`8t%tbw@uplfIh3UtrQsPI@4CLEQ2;hS^2IjIiJ>h(OYF96lcI^S;S36 zG>_7l8e{Dcz?eejF*cyX-4F{A?y~Z?<{uDTy6TYO}LwK!eg;1T@oT zQg;}YaFk11&9H5HC(GvEkfXipm)bkJ+sgGY)x2l*w%f?1*z6fsxEfWQlFC>}ss&$a z96vTO6BNxM$`8%1gr#&&O|^_muwl{Ntm<~AxfqZ>1ok)K!g-QNSTxP{AX80Q@wE=L z4AHqXnRSDF&F2}MmYtr}Pu+`c!FXiKhqGKM3T z>Nj2?U_6R0?ltqZ5L&elO{@$orZ|kjDqFn3SiKv=VMFZrr?E@>$YI2!7ZWZs|^BWtMzcN)o%~q-; z&vvAl{^IU*&|v*%I%Nn38S!BPPtvJH1qyZCkP)T_%^oI?+r2Vtz8CBwP zU>VreVu`@ooTYPh{c91dVlh@VkbP6D-1}U422s_n88OY4i}JN1>7F|`UeFs zq_8An#i`j1sMsk2k(;=(VD;f!f*(t^6;0-Uaoe1dZi;qJLYN+$gUR}jDFJ}+1#=9@ zRHo~Os0R@plqcLAfhH6UVOSzhOg1Z}azN9ng`um**X~U7gf`k=rdV=0tU7{8#$P3c35JWz@{@rN@-c47*N{rXvQx7mW z8!N)#tKh>o3`x%~c|@8*B4t62W@lcXg(FUH3QkVf;HYQ$@a>#~n&|x)w(l6v0d`nH zl6(SAf(r#Z8kZLlSvr}QBbh>m&@O!I6vis54&xGwhP^| z&bH2ThY?x7L_k{^T!bx;Zj#}|F4pmZYv(!mR0Hd!bt4h<_!Veg)W8)ExYFmkOV<-v zji`WAP^+!=dkpwV0|#Zp_5VZ@leGE0zRQ;dXmrHUaKgP{7@Oek!LCOdL>O-@S2^Ek z@akBk1?94L-Y}ODZnZWqgW9Y=V3dtF%VFy(4vx??7hq``vbQ%yZEnnSyNefrWRWc( z<6v5Z_*|)MlUWac{T_MfXA~H)%y96C*pOc;GaowQq4;blX_{J`AD?EnOLo(0U*`c< zTQ*vh50K8NMW5Xfa97PKQQT8_3B;$`kEu#$*u1n^DBG0YtD)^8Dt6LwwwD^2ZWGjV zqo;PVA7ps5>1D;@^$!VLM<|;PK95C;C7Un_ zF+2e$t}gPa!(H)9Y-SR$NK*8@tVG?_6ugl`*r5nTG-S;y%PUP}3hPFBT|8|8k~IZ- zz>~&#dw^Kp?vaLA7Yh}|a9_)!%H|UM)SN9;7^C`M@l*}h-!DqZR`{|9i9A+(*JIh5 zl8*}e3i=a4v~)O@vVNIHNxJ_^f)BOtBEW+|;6oy&5a+kLAN3;3YLSZB}21Wc@=hMU4$=zeo(dKDt1EeFzha<@Tbw*f2P? zg5OSwilF~kpWb8suNj;|?gZL)e6E%XT|+Q5PQN_rRj~@~BoW9@r%|mih0>o|{~5;6 zFgorUv+(KqZ1rf|}miBdR?#}ploM7HWE2dB~X-0?thPe_+faw%w^+FeBh`|waL2v^@nc)q&3Ph93m+5ic|%ck(j5e zr5Wxr%aI0S$V0ve=+%#5XrF*vO5*)QlO7Rq2Z2mh8p7jLq8C2l<X-gcQv8o-%5$m#9U z*(j&Vuau;mVGRG&F=iBZ{oZtg!8~`aL8hkHpVY&x=}6*-(OM52KvlGmw9Q4kr^a|T!hiINt zY~V~`&~pe!4s6djjIi`8H1@ciMv0l@ite#9p3jhiw4vZPgzCLn%ngEp$`wd?oCvup z!WbcFoG}Umeg>hXUx03+kTrS{Gnuis60fSpvJLjT>~+B}xhpX&^%)sMN2^FAHswfs zsNfAOxd?)Z)O#2#CW&<+$^@N z6tfs^Juqvq%nv`aK#-My|KV2`_^&31%?&bnz2<7*>HvF-qkR+E1}Pz%Kx>G^f_nss zlIi-r(eo;fIqW#Y#yQbG6?B>4(_9m?IoehcIb z!ye60?oASTReQBIdcFNY@6FCk&d=&hX$~7$g5lS+RtG3b4tf{AqJ~)5r8iE!Q}C9u z&E5kZuscLcaaSNRQq8z_qD|LUr4SkU;oC44W^71RJj}yo41Tp@aO~mKM^iatD%5z( zCF+qJ%wKl>`$UCmA=7CiVCyeP*B+qqj5IrP=#>oD2wp*ZYaA=&^#?ENs88j1gq_4z ztjbMPaqI`iuG?g>wvGcGfEqCon2Et-Y5p9My^;pDT+&sstl50csUMhk z+Z7dl=Vn{yAxw^@cLRYx5;)!_SFR+@*Vc*`n2Ao>xCwk#JcM=EsC!jtq{vvdqcnp zpDX*@7!_`)A9gLwr_EqZMJzj@aR&N0;c)k>QWBao&?uN9+srwls!N$@)+( zS*;;#*->@(j4d^D^_*C7s~UN`Ud3uCn%I;%nT#cxb0@$5L-SIE}tG8*Ey_grezOcrJ;EB55$Y5i>M|qk`ePiX#x1a)M>sSpq^q)SCRRIBwDH#O4J|a-wV)j| z9)U_oVDS3zQ2t|Mfb&R|oqF3r4%d0cQHO9!SgmJA^_p+Ybp zmQZV?3<_jGz)M~Af_<-~NU&apV1doE4cnfqSai&c*GiEz_>+$lS*qFkMYj{_Uy23R zSry<{Dn?6eZm^jB0#gOaxJY(m|IBQ!KtiVklqEhfdL0HGE@YB9EKz4_V#vc>1m6^_zfH8F?5fW#BwUzQBR+UZid9?z#<2q+y+T5%rWN32COiVdKdYNS zMR#TjrrVhYJ#eclq%%O<)0pAr(mdAs%I#?XA|g#dQ8JE+gDZ^}62@(nNE#`=&P)Tw zjzzH9G2uV_Z?Fu?a##2ZpRy~yz~`R~-9t6Qo`B=A+ycFBWE&UYrQ8J0Y-yQm<33-g zL<6BQUl^l<0V@frJX|d+5Hz1vFlhLvfk7Jy8d12+&okS>WYjV&=eOPZho!XeI-Lvt%G*?`a9CcOs5U7@$>!dbdSn( zlgp8TWt>PDL`s(GMu$U`9tVED|2=5R*kG`HqfRxPgkDIr7iyJyGo98clO0 z@hm0mg#Q`I{8=D~TbxULPZU;tcI3}I+q>T%LA8_q#% zMq*=dc4t)dnw{B0N0X?%*6(i%k9jSHu8(nJC+r`iiCUxoUSGTB6pApl8&S!tBZ?Yd zIQKSQE>JLRM4wdk9%ukvNebI+j3Vu;y-`j&qs8D$O|2jDE12OxP$-N{wyYX3f}2AQ zejygea-!_p06r_wRk}vM#PsOA6?~nyB+ZsEA2*^Dl{C((%oHdQ3WZg{KG<7SKxaq# znhZ%d#Q8jy)cmwzwP!FOjpMW8#}=9ZgU3a%wN`W9DXe?yW1N<)?{r#0ZP!MVBI{7PdRE){ zs_kH8JR-}|$S{tHC7TGRAcq01kMc1(FIjfU2oR_r7{~~^8WeOVD*{}poVwnt0%W7X zpW3}EjS`bupx?ti#W+vY2C{7! zg%g2Cedg8&3y@2K{*-25GHCb$4-&;|ywGiUs%Aq3nj=`z{4^aiGk&C|1T~XBS?aMi z8B(|BS#Fai5`q5B4S1obH31)m+7nlrhkxFUJuX40CyGNCa{2J zi7nEoc7Z*%Xc05>PmE{loF;OJ7L)#EZI4VSjvPPF zXCg8|i>-VUj~u&4st0H6y$HPqjO>~1hd6|a>Y*$eCXMq*89aMJwGLI~&w~)9M--?$ zCUC-&q8T3!>j|4kgm;5m6d_+9*WhoCu_l`wvb z2z58H8F;KbvD~o`(LAYk#CH=Cs^uE2jKUo<4ef=d2AM#f&eAgi-+zizPKDZUWiq9A z%0lSZIw-Z0qU2;cUw@Ysl&1HJN_j1Uc7%GjkRQICrYlP7J9U4&l+bic&?Ur+biV%n z`qv~|fnhM+8LoeGFQL3v4A+8)K|=R3*R|b-Xbk1aOx~mnRLQ6aAFgCU0!82&_=>#3 zyvn_V@><&c+|>G zEsqco9j|!UERMFiP_}CxYzX|G!rdgaiT}h2-Qog;U_@d%0E4)hpRlh-tFP9G*J_?w zq}Ixw6hnms&b!aAZM-lr@k=-wOsZ)Xio4w(9#!q5?lF%C@Gi%%Z&4Az;zpd!1-L2e z|Ch6+QI_D%VSRntNHAN}fHKCi=k_e78X&q`Z!T(z`3PoY{DduRzW4iTTjg4x)*}I+ z=`_N#f_WqMbEK$~f`yUNP-<^dd5mw=X;ye77rc~bxj+&sJtX+o_5VuN--lp47#3B3 z5_wGW)+9R6+*5k3OfWFbho!?n@fw4auAt0j%>FiFTN>gv9wYa-zdpLSuRid~O>C>S z%}q=2D(GAEKCv)#5BD9n*IsAVxNqx4(le!kaP&^S zsT_|1{cU$28S&-#DDMJ8B=*wK3=V^iv-S5^UbU%LBgLU)ITkr~>6pTzWij%f>0Bzl zdVWKOw8fcshi9lG3nDXS_i?HZ3Ol-2m8kW;dM%n1?vOzpYKk)Kka|NO<@*fRtG)19 z(_XNs$XK2CpyafCi~t_JzQzu|WfTt{vsZ;#+t!-!*CtE@U2;>h_Fe<`qb_o+HJ|O( z8Z3k|Atchyi{XFfhNG?P?li%6*|V8o&#bPS89^HZyZDz-Ihg!Y(T<4}p4PX(i?Ykc z1;Nd-^$*#qfU96>wl2zHM|}NnF`vw~1Z4-<(E_Dxx6wvp_=AF;EHwkb;=rOPFhk7d zST+kJ?l5GpF-lj{S#5eX39}}klX|+b5NvZ%@67}D8Y!;LaZb&1tp>Q%BUGO}JW-}ws#i0)G5a5`+_KdAi&L>;<(p;-kABl%a}cr ztpB}J=Q0rlB4OuhE`!|?`P43j0BagFBvl|NF(DXgQTl9(EAU;yB9x3E^oj)cAVKYn zCNaaR{1i*jLgEjYs|EIn#y;~JKbfh5H)}D$t{?%A3ug!3gFe*EHhAXK6wR5=c=U zzJXz9P$MaF2)xFL+#4ws8cvXc)T)4mO$7KllO-k$+fp5kX&LB|;FJ_nCJ4|ehpil} z8>W^xq2h3mIQAH0!UT(Jb}?;bF1)7PH#M`O@wugAP1iq&@^n|_U@ktoC1<}H4A&Re9tv@k=WCM$Jk+og27h{c7RX5#0)4;o? zFI>gxU=Wyed7xH;0qqI&3}(yL*045B?;MH)Fo>oqp5_TE?n|`isLAINM^-iv&UK@l zMb*uspheJyewXP~ic9>;Sfz6n@?_eGo z1nYg0^?MUue1dPmqriMa39!XGZI<^UZffdB&4%ZKEZa|!s?2ex4$63GDNq8+jj)7> zn;@BuTMlsJ^^JjTej)Pp{17CfrrT6)x0ByV6ix)Uyf=(6g#-+14YKtO$;jIcvQ zmb7`q9Ra!;$%z0HWNVSpRr#ky-ST(|yz26&Hz+Ah)sZcbq9qd7zt?{}UI4FprMUze z3yLNREkW)s!A7L*eI@W3EOrW}&lHBi;W!7Hi)_ZH1qOPa55c`-A|7wSSKRZv?bzTo zLSlr$iWJx&YftVfboG=hQvaqbR=~_6Dp#fjE4d~G%3uK z^?yfr2@$s-qR-!Acn$SCqMm9t5LJ%KFbKdEgK;Jp7Y!eAM@L;jLW(^#%-k{C`$%@i5L*IP*nptSBomd&jQKn{8PEZW7w15(@>|oo@*Z+$)s7J-; z)3e+dJHLZtd7JL@>RC3mv-T{DLEL|MUhR=)04D>-NVw4o;m}D5Cv-6@iIIpPVRh+2exsu6UFqfAr2q^AyJ8?hnJ+I~GLy1vZS~ zI5(xcDK+j6StPG^+wh=~U9>~m8k%8+vtjA2m^Az~S%!%kQ-z(3qopQ9TOgX-1jJRe zELyH0+4Mq_t{EU+Eig@+tSm1K_*^1QHHE7?@EuL@i~~cUA9aw@`>+V9mxHm!SOnlH zJf6@^l&3{mbs2dt9@L1c6Q6gT+_wnz<6*FWO7nh4B@wvprm(ZctVbwa^Qxd}pxD;E zW0f9Fs@gCy?1VqMevkDK3_%wO0*l}R%v8R?E4gK)UP*_WA>YDn(xceqHQh2kkJio8 zI;jYv6t2KcYLL5Q1=#`%9x&ackPJNNBkDK** zy~LJUJRfqfY|j+jD7)?K>=yg>2E8|L2K%qNhv#ql`>%TJRqw2OF*rOqJLsKV9KY-x zvpqJ@Tp^%8c|H`0q1zjgZyjePDDkRfNv#V*A80RL=T@>D_78gOw0FkdblKo!zu!Aw z{|bcAgXG3!?0|Ivh6tFBR{xoXO4~GUul}wbAnKi;3>XZ(81_N;pcpm=eRW|P@250K4j|RQ7uRgl#@H`TrI>Wm_b|bZr#8>*aU;U3M zQphE&clzRQzxr3hN(wJm!-AI^jHlZM?M`U+y}% z);1)J4Gg-HK_SGo4%YvU^U71=!Rz(^!&1P+JmA11n6${F%ov!Re|OlA%?6b=p9vwgr~r&1Xo0V5iV!VZFoITT4I z#|2pG7qCGScx<*I$oqh)@)!etUgLCAu&(R>m=boFiUCX3#tuq>fK)+8M88PVAp|#1 z5zmp+EZVeg+DHt^swxW%H-XANsm}|hdwGalyzZ^<2Hsz2D-2`w2*ZuYH zKm1d{pNC&sE#!x9R(R#F*fd>b@Oq|M_GYGrvI+8k&@6dPT-Au`3|Xk&J9`B*-_%x` z{Ugf#lUW>fcxQnYz%oI~x(OoN&7}eUQx{P}h!qgHCer00li@Lu`@Bv?Xj6|?r)m8? z9VdNB7bi*wewTLh5_dwlF2;hHz&k0As`eqSAzq6re^xj6H&m5>_Jw`LXzW8; z17;yhHrM;2WG79yjIgB?l11+!|EvmzuuGaifC*(lM+`}^HS2Bg)< zw_9Wa*HgmB8^Ye@jmK8GMqtJCBRsM~8*`WCxGsl9Tw7KotC9OO<6#z0YNq-;$2%PH zFJ|iche$4T_tH5wjS7+Ulv^=c>>&lZ`}#LVb)zR{m9gWWrX63*!4E^-UUlk2Uk~)S zfdA=VSRtqmf1-?@G#k8nnKsYi6ldD7l_m1S=$wG*$yg)<=xWzUO#A?jP{7Pc__0ES z6^S4l=D5<~zb2B`$Aowe2R0_8h-UMO7u&8n(?!Io$`PTO5c!ogHK`*^{&mCiHG!yB zX2x@1>iO0%xXZX*ZWwILMMMCc!c`ovLvBa_s&G{yRG^hm<3{r`6<)DXwTysn09$LX z7VzYjo_U_*R&JTDro+KbRFNh{M^0y$;hPGhkPyedgog-cM4T5rrcDCf`sh*Dx?RHc z7_jye6l)6aKWgxvV_&tYS65R1DrmyO5R*3*1>S4yo&yQ)*pa2oYWsw>48dgm-6%`7 zK#!YtuQz^e=m_?N-(3HZ7&@O|Hvpmi2V&bo)hn{Cfcblg;W-3YTY=jK@`xkr(=j z1$CS?bnLCE%4c^9jOsxtjLI0OyU;~`aG0A$*j+^MsC1s)CwBQRA9L-goPqy8b$Tp3 zL=KM{$jw7;OL@Sc)_;U8pEH}OpL#u~zs#F~9?^b zOp@bz>G(tmZ*xHxPa5e~_jg;=Xiw`RABbVLN#KxP-y&|LxGrXaF=CS^Aqwq$FR`89 zZt>!A&s~(0b*t&ov-SH9{1Y3pq6L7pJi0MRht16g?RJm!=X+{vuzp_`r|3u9q3XYE zX>Kju1fjJ9*{uEp|A{BYfja&2s7F3VOa&qsF1@+e(ZIM%&N>zk5N&ezH%?@~JWZwc zSh5wLiv{QqinlPQ$;^~4F3gaNW|&4+u)8l1cU(Pu>3k73xU`e(yR=zQJiWkrho3!D*O zRN>Sdj#9G17->ZTiYCFf)YYkGtSr>hVR2J^d^$M%NlVA8){p4mWbpdrxO@FyyX-~x zU6&nqFJ7O#?V^2CrR6}S9AWs54W^(-LbXVzd~Y9Azu2H9@RMS1FpZoKvTjMs*;e4; zxhvVZjSZp1(LCZACpd#c$5P{&s>@?l!PV>I@qyI!dI$Y%btfsOl>B6#$(dZ0K3T+a zVmB$9JT?7E{S{MlZi;#HlzO|j-#F4aHb)w#h&99!#wzoe&A|GA{g%!s$(b5*S`1a_ z6!|6^+It)!&rQ9Y6xh-FJyu}j2Iyd{lLr~Xwe&it=W|z>91-mPl&|^jQRBEwa;}FmfS{5YKJDdSCQaEnK48Ty$<}7DmRf4YymTZD) zMVRhwS9Z?_aii?G_(vj{f~4t?@IgWj2~2a032S41{DWTW@3M5N2igoH`W4-)-BM#V zwwlkGVPAWs-aTpDnO%$+GI)w3Vf;rTryZ=-=bWmr8*r~ml@CsdV`%9I; z+ZndZxUKOQAQ=|m8lwWRJ%x6t*hZnY&L5>y_Toj(|4BFbZ<+qQkKsyi#)w zkFwgUp-ys?jW1l3`R?>oT_+=LvXi#Ui4!N)X9|@woI!-Hmg~x9GGk+bP+T~=J#)lv zlKrvg)bo6}d=&TtOS8U2RY}XpNYMgGVD)mZIdR5^B54=8@CD@pvx4!x4e@aEN6xEn1vt zMDSJ|@I8DSui5b0k~j{v<7MiTkIW`pYA-ymkk{gVmtRD{!CrAf@syxa8^A-!TT#TA z^hDI)K1*E*OB`GJ6$jC?HG1Xdh0As(1uR7LL|I^ymt*&#|&Z(jfh^(=! z9t1om*zyGVV0lg`whS_6*9c84MEHrsRD49)gAP}wx-l$5jm*0$+ z6%FP7%`_RKED)1--0^8aAtw_y!3USuYg>??Mil;F{hmx3s@fnZ-u5W84SGy9>Jgty zi2i&JreytxI}4811btcK)0Zj^TMWQAmc2|o6a*%4UhSj;@f?V>vY}F1AcWSi@Rn)v zz4#ty!k&e*{>fP%0SkZqQ}645s=kn{?_0EwV>%6!B>iFc%$;*C^ecbDxH_3+zPYZK} z*twZvz*cbrE{DbJSv+KC@||b_dnYK4mT$km{^w)%B??Eft^e>c$$?k@g)cycz_e|2&Eaxge2(9Fj#4+lSC-HXAXclN6L z<^_CXz>Y6ozv*?)Ui8j-{ry*iMi%qum@H)FS=%T4AN%;{XZt@P&Wtz@!2QNjy;`ru zHm(|HecwFkHx{^W7Wf@(;2!Ih3*2vSeZ7{+xwOscJmCvWNpsWZoyy!gYzEHQWx5h< z#m67M#h@8gV}Jsj%aW84BWfy`fGyDMb$EA3hSzep2B~Oi$fV3v zyJ=2_viG|vt(#mR<{v3?jD-_#?&aXgT53l{>3#e2!E4;NgYz#@8{-Ck+ui>;yksfg zerx;J+lo4p9drlgUinRbfJB$o#WL7q7sm(o#ZulxZOQ0O0XaIDv`n9&Dn18Qd<=Ky z@x?xBQ+ToWhVA!`&(BWS{>jk=C?n8KXZ`)&*?t#G*v=Ya`cSmqbdlRZ0j`PeJ}n&y zyMK=NhLLvt+b+BWZ(L}3dnhjmjF&BZ^c4sUt50IIA+fh}>E5pI`Uqq=hHK+R@7?Li z_3zGl2ghImy&asc|M|^v@8yY>_Tq@W=pJ<6?!UvA^w8mR$K5x*>%V;X%|0yq2|IZ) zc-K9-nzd1NNdHd!5t2QRYBj=+NbJ|Zt1=-Fi09C}F++w8d6}0z=KDC7v z=wU$jum=7*rEu9acbXeNV(?zc1U_|Ltb*qy%_*{!2J1gk4G-l}OvxM($(eApT~8M} zH*5>J6tCsqjwy5`E)}BGu4JV7;NXVFM5l_!7%(GXz>F`6CVLEK%O()O-HjZtv7Z_X z+WJ4%|AvtfGaT52og!a=zN-oPq&y!@5uXQgjmz(7V(7_+E=l(g&g<@U+n%{e+-0?2 zI&7M)zfYDdUGX@-6zux~SzRugIdBls031y>qBF zqH>rUsv#dIoaa1O9Y2FWF%5FdH8pcptd|zeKCci9V!A=e4SarifvDNgq1gk<#Pjmj zatrnaP7Do$xy-;hu6T!}CM=McZBvb; z08`Napxrxc!C(uPENS`O_rX zRA6U{d^8vZs5tOn27lGq$o?=~ZEo-Z_{?~DH%T~eI;0QV*Ix7KZaurwEh`D~jj3fK zcQ0#ibn!!aH&6}sel6i8lpyl8374ew3WR9pyGHVyX3f{mRn&iocZ#6t3 z`2>u|5T**>vsfg9#z}ZM)WTz>UA;ABo(_3_69-*m&4=LZ(P(k;L!PXE7{a$F5-k3^ zZ~j|FZqucyxpyN=RO5yu?Df&bqdga|R2D9;h+;)KwbIR~42K9@LEw!mhF~BVXAG+; zn4#n#f>^ZsndtyQ6a!y@9?(yc&XdRrqcUcpgHsmUoZDY!LkFars} z*jA~Y6U0O5P6RwgLDx2i13zWVDkNq$f|oE8P8FjrfVp()bzkHz1u5R zZN2`u)RYrxmb|y&9kv~%G$BUR)r)#JEx}GBo>8drrba1`i&T29D%enMoEGjCoFdq+ zQT4)2{?`4SN6)_221zPotg0W8F^qBLqinj>`VNykE7_B=Hbc}f`Ayg0e#IT0BPlg@ zw3H`U_^LK^VYo$#hd4N1&3apot>g)P+m7RfLAxCZeIs*)XBT{P=`uH^##5N!3x1xnOFrFbcl)heZaMQ}1s@_SDpCeYIy&J3i-gq-6lV~TQO+&bFnf+I z)Wa@L&MA*=;SDFGiMk*)gU&WJ)V+lU7sTN<;#ddE*6)k336%jQLv>qD>M=nW&lg2g z(dsCvtQ4iBKW~V^+)vSZjlNie@9k@D9nec9x6Zz!o9GYW?Bg7x>8rrne2 z2oX^H;O0Wchujig9sgQY8Rjw#IdsWo;-Gvg#R8}D#bI`9Vq_y`Q9`m+OTv+P< zKBC_XtY#OnnguWwUv-eF>)##gvWQ>w2Iu{krZVDVctk63O|QtR&s<}CFym3gx-jYI zC0^*`LHGP8`fn^M6|4(aXkD<{5Pfnge)h@FTupjn4`$b+aakBDpkD(Dgy`l|WNT?h~*eXi5)F^mL|Ih*bGm~*Hb@eHX5A8tAe(_wVkm&QN z3_w$_nvT>xkd$6`ty>^poV6-pDQ`f|pvU38t!bkXTHJH`K1 z+>1*Je25YyKug)Q41jnX+jZ8*f|$#nJK>zApzJTA(XGff^c z2cjB>@J~%PeGeKWK{m?n#g5QioB^I;adS(pfL4ZG-d7|Nb1Fh`2$5*=VrH?yj+S-sorF0W}Z z?q!Dd?|>_P*|}sN-&{y->n?x_4ck^teyW)zZLy8#J6(S}uu7zh;dXVCzwZqO-TvU> zx^p!gbbtMz`=xzI>+{CV=+tU{f6MglZ7uECFY4fCYO686@813LzW?)3P40V{Bq;a6 zKg|;Hvcze;Y(2v*6N?IP>NaGfh{ygZ*^FeWn%Xh@Sau@%9$%_=QPXg&kVr_LfOMy( zlsAON;tl)rQLk1dGIQ?Q0xb^33#fvHbQEIZCx={50-~Z^qpdk#fVBexP6$mYgg#@* zBgblTd_vcjAF*ri%C@9_>Y><@I&XZKtKiF(oU%`R1_5|U@ehwDDX6B=NBM-V31DNh z2GhsrLLo*lyzY`%K&Q_#+HsfNUK#vk)-l=15AK8^?9RW!nU$MEQ!*PX7>rc*@z%1@ zs4J+%jI%XikRwPdrgV++pIW-xT3pN33bvgp&<-nFv-+M&SvrYnI(Sjr zUed0y6Gw3v%L}YnrOc9a1eF?iw8tU?A{zZd@G&RwSG(wR!zwiMhEH&xm$9c>H9KX` z;!%15;e}^_OptUB)JqgGRhmelMwiWH5D>5}#7Uu~L&TUI2JJVFqQR>77;N%{nvOmT zBe2lK{~$;8Z`s*&J>P~(HiR^cV8#BCB%27Jv_Be2-;jR}qj?14SAS}?EZ{g5>7ZD8 zlo$WQ@4e+B4CMrJzR7Iylm=nzzW-4{3s$(mmT9C#t^`Gmbwt$Ui3;YQ@MmmA z>`M$C|CVxyEhFza)8j030fSy{2EmPdQUA7oM(J-#iRWrY>fQ%8!k=z?YWm-G2kMcF z>rBVUnP!nwl)n0%eSFjPGzwqxweY0JTgRK+)%@R>>Ool~2P#pZ@*Yl3(7dZS#bys? zb^*N~BrqY?CH&fvJ9|tU;=B*>&({_MRf%OiE%65}SrP7R{lKg4M|H}me>#S^tajfE z&dTCN1?Rf8vnC1H`=xI&ZC+8Hf3u%mt8uW(nJG-`0%ry)D4~YE9Z?26H`T5M$)EfS z1RH~~-%=h%nQ8F&v$}W+Lt;2S`mbNo*Kc8?;UhiM%I#4h$=7cSZ>b)P&c<^XDkz{@ zXyZqox3pt78Rpt`dwE+5 ze565wivLMTpaQ3jfwR0w4JKkDLR!>wqg!qemSrwR$#`@rCUNu`6j)H0C61PJ_y`+= zskW0B+}7wT9TCqNX5!`&gXaeXZ91RATweRPq^lpXQqlL_bV|VYy$2Q4uZE0ydMD)T^4I)g(#U z7&N0?nQfE^2TWSG5u|{)m3boO8_l?H=h28d<%NDuZ;A_B_w;eCNM@6?MtZiq6Mi6V zZnFh_Mp#}T;ftm`fI$Rf&kwJ1BMGJFGH=Mjv0%czlfLIpVI=Ud!(I615k4yL!)8LzjAdB^Q&|1GCrI6-kl8K+gyP|fnl-L zI|qj9OG}2UcWm`XHsrK2mbdyVJP@WY1@P=t2Key+7tZ4TjpvHuNtd0q+2Eqn>z=ev*dDv`xf{&j zdj*qlN*p;-hXr=mS^YSYmdDgPBcIz|ajwayjc@Alam}WFnw?bpCh&Z9;$akw4!v)| zK2<}_spXpB1R!I)~E<(J*oPnr< zAMhAnKs>qXZ;(Iw;!0*qeQ{;!G_QMeWw_})4?aJv*~Q#E2Oy!~c=M6(aYv%$f(^h) zPGg}8Mc@HGo<;DGPx}CS@UAU^x#d0Qv zwRAywS<3~?k4YwmViGRquo6O@n&LvxVJhtK%-MJvpHxrdAU~1ox#zf`rzLD2=uX$V z<@eu4Z0_0a!Wy$B54@Yzk8@PSjt5eCi7%JbE`BnUaJNAM@n1zSGK)F#4sS2qR4?)^ z+LxbWf?briG88^@^b|u0Qr8w}d<1%IdeFa$;GcK$j)UYa+ZO9ja&rxP@LAnG-Oo<) zoLl2DTnSJb>Dl?r=r%fAruFb(p z3?hfklpP6%GXmO|8hy?T>~FgaZqObcd^-#(o5X_6`gqH93(E4xFjK z|N388I~v~bi1BC$E6~f%Tz`U|Qfsj33T!S34=OFqgR;PQEp9!Z`{-3zdb1?!!k zTo11Lz4jR#)Lzbg!l};$DCgDxAh$u-5)Df+ox$xbZDlW=qI9=3v#P_3Gs}sqx5|43 zQ4crRhAVsjEl6h0=3rG#VB~Nh=K-iT3tqEfim&f~f7ya}gQc|;qt%a~0q*}`fp~}6 zdsEMP{~c%n0S21mijfDC1IZPk?1;*zncft9iu(>rKwd=2TQ*^XRR@R;@*#ld(vz+Y zau~F5BC4eWwsB4c>Gwvi$GGEoEVz>njvGZIIu&4p3d?i7DSBSy4wp6`erXRJFJZIf z2LqDydI4}f;A{5O_ZWAp8m`9)3<^h)8KVtaj6yvig>X$UUVw{2FGT46O1R{!Kav>m zSMSsTx1Zo!@KvjKYGH(0nAJPsmIFi^EzZtt*>*-p!`y>KqUJ@iN}KC=1HrFm{zt|% z_sJ@si!i(qbK&0v!_^OCdL8D>ePy(ti5uYtQyGH((#9ZYKOf8;Yb;<&_{>NI@E_1? zz6;-giqU|xvO|^jb`HkrBv_K$xwO4l(xC(dSqE#h969g5x5bnc)|^|n(vV&=V5~Ns zWuFh>>@B%3Qr#WG4F+kJwe8T@6YTPJ&(I7F1cxf67+%w7g{v9_aLbfwlFBTKXT!}! z6V_h6n+dQ{T}y@hpQtb9g3_RWgmc0s89bhJY{woyX}QCxWdMElgF2 z3m$!oxt_ac3;bPyzl%bk&WY_m7)vj4XG z`m6ijpJC|ejr45r?NHS(7og*_RzjF~v=Ud%_m*?-)|+E|247f;=_0uQR=t2G?j^hg z5^pK)e+%9v$P$c5l|&|-A@TFw4dKGwuv9X{&Rvjqpw+U+0d&-*F|*dhepj2d7{NHK zjbLQ_(-sr(Ba|8o(6|jE7KLC8EmK=Q$%-xTuN!{LDUKY!tpdcJvW$a7nNw6mMcQF) zr|H*$a)EX6+#s5InQTNUa+1-atwc-giJ=}YU8GEaI?M)IWE`;GL#x*f(4K>{3R)(} zSR1A-@%_rp*e{wCr`;=dF)K@s%AyV;fQQaDQtSAt7V;_n9;pcz1+J!=e#Kk#|To_714UzFW?C2L{t`#M{~ zI$i{q#!(+IuKqu%c8l<4l+53$XRrQ*_OAT|j+QTGz6=CN?&wbN=o{1zb++EQ02e7^ z@7+X!M7fV&2_}>2`@a8qoqW&byUx08@MH%sdzTqc76tp6;I{CCv79j%HZlO~&+}ug zA8Hu^LY|{T6ml@yDDSDSH26qxAovYMJ#{PyueiG5>IHts-6ao8=6>70L%oYxGdCJ? z3oK%kmw1cy&6H#~8nqqvKdZAHj(#q62yRdMkt&ZddDnOn=IEe0L4KS+JbjqL>c?A9 z*3+3ICP6%z(4X`BZ}HzF$GZW)AO9IF5c_~R8Sf?#J{iXhO-d#7;g9=2_@3>}WiavP z;3scXv;d+%W&4#P9`2}jYIv_*2?|V2EJ6<4A85W(OP?-)O zKtc3)WESX|=#W}+S~M)@27T=!aEDDJS|h_tw7Gh1xzw^^{!+Ue`q}n7{blg<8<|}FZFD;6i{^ zoUV-WZpJ=|j=qPI^hUtF@Ifv(Vx)vYGPaMXOz$ClI5tULHcp^ALf0$>f;`jrB4zX$;Bxt1>!b_^qcaysMS12N2Ag$nem zQ+fymg;nr|;MuuL$#q_wW)2^r$l@Q1bA3GkxzzFYhG?vq_h%Xk{q{)Qd5#YCQV2$> ztwqeE2S@rznz^D}R^Y+LVl)^Y0Q(hBz*H=kI`2ZTNUE0_Z#7z|rROn^A_!JI%lVdx8d8Z8JHG)jzG zB609S1h6dn1+Z&DSFHZ;iMQ9w&d}%@8Gt%HtB!^4=Odvt8Spa+raNDbD9RURLy2i@ zKjr zdv*1T;#y_Cflg6JeO|3V_UFv!)0!d5uH7W6lmqH!Q|%&`YWGOqfKZB`RkA9O7GkLc z;dUPg4jaUd84yL!suwATBdV!E7@$C~IXQ=fQ4Z+#%&>}Nr1Dyh1aTFr+7qsxiUBwp zn2!)`a4BU3FT=Ge=cX~3FSPL4!d*K$tcFrOYp?)%3Urak-ST1kTDP8|sPj75KLZC&>FtK{!~5v6mw`ky|C zT69ib5n*VZlJXNF;p%8zgKCP2ZS9a_qtAM=lON!`qXV3;IBq@gp zmVtCjR*C`Fw8^T@5grxF_+o4T6}4Gj8k4ipr*U;4O{IQ{3$sRb zp8Y#;&C#8>%1X`v`34)Cyh))@AHvw4E0lu4CP{G2-7 z@Ww5eCf?0r&hZ+o=dKvY{x(^Iwe)JiyrlIQfEEguaInFi2h}wOYo0Wf`Uw504BdcU z4wS9?eD#wLw$6G>V!(QDvLw~V96i|8J2=ST!LI)5Q??*uv&H_aHoCxp1)~c*SkfeU zI3{M1F*PbfZvh%$y*1bol|c2?$cXbriy>S}71k}&`iq59t@=(kx$Ficc#|2`B?e$r zG$@&sUb3a5#UgdS8cxSV#a4sqG`qLi+AP0S4*&W(tcOzsrweO$esmg+{`Tk9@AQ*Z z#eCv#f8EqpF>rUAktPhlV|FE&k_1^1G5ZNd(~vFUR*@%&he5B!*{(~Sup@qex_ccL zW~xwYi_Ibg{V`!DxxO-vugd!~aG#rP+FK4v60ziHM$Z|V)9=4)F&DvQxIK$o_x*R; zh<^V)?&>7;7htEn|8CQ*G5|+!QiAxQBt*WR@YU<6iZbFvc#-LzNFgDfN_Hvyhvy+`g}>qY*Jxj;HEYaIMwE1tUF@su!Wiht7g&S zOQIT01kTRtrmN5`zM>+5Eh!Qvh%3Zz7n(32{kZer=xls#a(A?S|*Bg8&+GLM*_5?s*X~%07xHUN$(-j zoVALq4CLyY(n6JzG^7FWzNrC%qENI@rAWb}SfLC^_5G@C+AsNw;bIy&C}^d~q&DKD zvbyPPzT~%J>WW(=|CmW%r$6i^81+j@9vc2@Q3?qlM)DPWsUz%>@uJ_R7YxzKTVA?y+2f zbWEF?DN9;t2amvZ>ZPYwy^b+}-p&l$pN9fuJ9e%CKYtAx7*Dag2~GCq<;mV;MfPfK zA{YZVs99me7H*KPl_B4dih%918~mXPh@H`4^;EtUjyeeY(2EoT!j-Dv8?HLq(JFu* zQB@uya`COp*Iitj83#ud_EW+)%(x;Aff-F#?`)}j#3zxPVZ~wrxQP}FAHKI7BX2~I zWk6{(1c7g@dc24RFI(mhW5Ep=s!cgT2I1-le}aKZOxvbW)nLLgJ_qlH1huRP!9J1= z#DFks5@G(3%ifF2{lJz(3Z!)d%m$M3P4J`DA;z;fhfMx5!iYW8OGI#g=Q$A#druYM zrpapLdGL_{MazO1`l(1!LfSWqc_ACh>K!4B%S9fWAV&?LIhrh5T_Q*?8v$+gqInE0 zM1{YO!PgknmX))_VlbFvfNz*w7X$jRnatZ|Agdtb))Rpy@=%R5WS>$KAhnmvY?30w ziAX$Qcx03^R264D1b->6Gs9jJULbXS9aYdhQu*yDaZy*95H~|mg&8g8$mvXCwMD|7 z)Ff@Ovxt6{fxFto#|Zlk4+zizB45V}mva$+BvXli3NTxSui!}{LjmL>tw}w%_eK$a z0}0D!xt=owC*cv6I?T!CRG*2SWPkg!Qj>rCD;xQ%|Hl#u4Mm}H(P-_X0oB-SWiCNdDZ@3hILf&ha|EVh zf-B8#Ez-(a1dV+C5u%|)@byQGK6{6Tg^D92^|SP4rq{=S%T!}J<%Uwoo1-oT1S zXYB?qCl(fowA2b&j+ZbL6mtPc%2<&-QjhH{Vv%F^7ewJNt#IyvN#cn6Kd3)84_NF7 z7E|MrqBYgaz$n~0_R)2N$}KD ztG^5@ND~{LLIZ|Kvx=PuE8AAs^7r3iTbqfeGQ#k|k~VH$sI{ZhT$}IB9MhyKr-E+uL#NpUuV_Q~Q-)N* zE*XVyie@L)v9{Pb%HF;9&3N^LJ5n!_$(fzjwDeUDDAj1HBGAPJ*#r`00xQ;tbayM< z3f%x-DGTy4pRNHrp>dKIQ}BgbPN`b7pO>&h@P5hbT_?wD;GQ-u#LMf^mJ&~%hiwtw ze{aLYWDb6sjRs%@jX;W?H3t9RCBnoc9>Db<;e7R@Z!IetS?#)pZilOyaZp=ny`l?u z9pRsv$t~_y7jSCj>ff8}+m3V?6>6ZjS(eraZE-tC*tKXE5)qJ+)R6%&+6~SqX3ChMr zumrh@TWn@hJ0&U*X+wo4h3>;dZ8i<<)3Zen*@F)it&!+4f}_yy1k10HRiw zbG(&ld7G>SPooSHs~o+#^*Gwf9W6t%O4e66;#kBefz!%JkC@|SNq462jjKsY8_j!1 zW#)DH>Oq{1rpiUEKUFMgBcTehbe%csNJp19ilyc)jZ@vIK5_FsiDl*?x8VqtJg^7r z%njrun{0kH(xI&+xkel-S`FCl0xeRO8YZr=s^Ex;NyCw01MCR;=cCTWj&KasVnJ{0 zoGqtFcdgpfPO0}SNQAi*mhQT#>(xoTwqnPDWUbU>Vak;+a9fnaaE}Kz?9kWFAb>Mi z#gxxCgj?Vt2d<>HtwZT}tVrEiUI|@XuwLDb&9rT_EZSHIe7^r_ zHHiNR)Wd#eT+~k8=VTuBQ>6WNm#i7j^dLkiMS`Ee?9D`z(Gj7rf)-Mi5t|siig~6* z(pmUw^+Om_KNctTAkVZ38p1|(A{kU9cYPt3A~T~ATId0cUO74NHsZzO%I`N4TQ*cq ztEM!>#H^|hd!cYWgM6U|&zfm-spmEV!+g1?iF(j7r7nty~0t%3T;7ftTS?9hFG&D=mhUP0AOIqY?_eAS^F!dhu8n)Wk(?M!AYQkz$}O zrXVKLaRPROjh!(Ahm0lPSc+;Dd!6TKF!9~Ezz61E+V2ljXKp^r#vOX7`wR{mOD}vx zLjz-Lh|*-M8cF`}(XX zj*@nH5_A7a7d^|1Trwn{Xp3`%V)zLg%K+6OLKNXx6aQ(8aUbsXYz&rN;LXMh{Rv0X zXH<=iH^=)3Cf=Ce#T}W1O|7lObcQ|O7wL~D_kG*Swjp$@ixMja+-Fd2AsF_)4KsWU z#>y;8YvZM8Z13;HjQ&{-_stixVPx*1s=bXn#7whYlz6*6vk^naWxB^XObrwY+SBHi*A;di#!u0 zM`I*=1B+cLt4?EW{$%s{HmlDtJl~2Mi#2L63>@M{eVQoX2{Z1zY-2m#7u^BOIEllN z9o=Hxto`2C$C7om@U@hx>hpP0pZQ4s+|8v*&74cMsa>!)=E9F9rcZtc_XT!)&;k%N zJKSjpr>F#@dZ=E18{fcYBxsF@%W?x;BbLj zlsf!^F$=P55beDEJ!n)bUTJ-5vD9-}8w6{>iWb6vsdG?ec%uX_7~Usor~)B}Gsjy@x0lnzb520(3#82UmB%oiswy9F)F6FZ^ z+_C-HR)?{Vigs!yzK>~)Io`-U`jOfRZC}z?`+J{${8`>+)E*sNifb3X5Y&|T29_6h zBe94QXOH+NY;}v2Znivk2ycdnUAe=6jkbNVh!OYMJ_JUaEG{`JqP1 zcU*f++5&84rUotrE(acA-xZ z;~(*Ywh-Jg(Oh-0k+yZGxmh2B5BCfJT@&vsgTFE@928YUoT?}rXzO(&FfrvOeK#sq z?O;l2n-`RKXj6Thl4u}yuhwiJVj}&}wD{|TgfUn3VODf<)C8aZl+(6w&=>fcLf{Q=J%||o-L?`D=@wj zHWg_YJv>~+w3JJT(C}qFjaRl$SOqbO5(fQ7eb9N z^O*#59HzqX`4r5*5Ji!d0Nx4xa3`z6$PYr}O*l*Im$iSWL_P1pKdQ9d1R%BA%6eAJ zoDIUr&R{{u;C_T0)B95~CFCiX5^hK_rlc{bK6c7lq?8DPQ?29;!fZNRAzCtVun@zS z*di=$r$m6kD|;o zQeZLldwAk})d7@-3oplG&>J!1JBR>pL(gqqeYTdK0|iHl?y7g%`^Rh4b{!8(LmW-= z^o|%TuvF7e+HySYbo-lWJp+8{%|PT=-BT2=JX{k*Zh&*4nQ_;3Ef4MsHDP{|vF>Jy z=`wH9fte=**b*uewLI75G6>MIs+P1h2A&BDvPSWT?f1?~XttWfC3ZIN9WW>Kc{CmJ zaXH9bF>e#qS&eLjzCgKcsiUkcgCwQps1q&V*ito}rJyIc-`ZZNXleGKyYqP1I*HrGRv>m)RZMQskOdWKTYpX84l2iIRd+AtzaGQ}j@Lxx0Z5GyQ2 zmz(Jj3md6B!?GmOPn6=+c_^U^Mm`+swL#%TL3DrI*+y?z6Et7Vh!D5hUu1yoIhqh6 z?zHP2++CH?QiM&&ux-hQdIznQ7u)4jL?m+jMK45PjaNSm!$8e%GiJw~F%NC`5M~Ie z*5Vv8U=lV<=Iy|IIE4X@@Ni(~N)Siv6zxo++{VxT63O=AYr~3eVUI}k?d;&}#^B7e zR+UuT14qb&$0;A}6xAf+*C}m`Jy4X{i9FnCkSc4a#nz`xKBLV~Q2Fb|s|=WD zja)=D&p=5Le^Rb9eWss_^Mw1pw5+jIJmg^2NW6|QT@0$Vz!L6qtyIA7yx>6PIQ*i8 zmEAk-3orFsmLDODDzOg2>@11QLSebD-GjM~U76UYp4u0*uOopC+xZOMi%|Szxa2v@ z@Wg^`tn5kUbA%%9Y%kqAq^cZo+pE9|Hla4yc-qZkOdSQ{Fb z8O2S(A!BrOf*a$EM zbgBKs6f7ZoxlnvS-W*;d+z--IzyIeoqA>7BGh>kE4MaAmvd)rXFuSQZtjs5gkmXI_OQ@2AcmS9 zt6jCC%pIc2usC|mptdt@ED(ojbD?X zI>)ZgnW%cGqUzONO|>95;nb{=H=t}tCO)ncd-3Gn#? zYh!Me&#>)amBmobY@0Af8zKsi1_$Z9 zef{d7fR#jH)DLXbFIhY?VAJ|Z|`pxx_dRnyJ|FHd@>rx##P^}FosNhVIx zW$o9OXT5fx+8kfD`&Zpg@3Q^2cX3{7H3vSC;C7+>0rmYHQC>s^L(QM%tRkamfMEHk zx{~3jm}Tt?JJQ9G8EzUR(m}A+Falo&haaQ<6%LF=iegQvN2)^Qvzem@6~u%!GKD3= z_hai^ITVENgl}=M@HW6FEks(zWnw}z;CQYsNi7t+6xX?MZ8wFD9UpyiRI_jem}0Me za>>rPKN9ScBPTDSgxb=#{E9}-#z(pTN$n`}armnf9MsU~<-R872UWjx&x(T@RP&!2 z9@EiLWoG@bNd4H2sg$!Wn>J_gs{+ZVCAipo4*H@hl9{n|jNolI3 zf~t$ptVfQP27#q96hf6$uxo}9ZEC3FRU7W!>A2dw<(X<2hSL{jB*xJfAw$9&^5B0t_ zAQ3*S{|A?SI6=Mh7c2wBZJ(d8H@%bYIlhj%zlXa23Ut3w)oL|Ks5}% z#>K&59i4c=RlXWyV{x2?43r%_Lu4k*EKqzCL4j*e#X2v+wB-6# z=f&wfqHg`^S+{p(zT~cwmhI%jL3&6POr%aejXv1ZT&g}zwQ0^ zv(=CDy5M%w%(W~c9w8VFbYp5dO$tkaG*l&dBcGZkxp9-w&?~3f0tB@3GD7qb^E8nf zYj8WamR;P^W!=@X7;b&KEzz}kPt<09!nON^GDaYXK>dJ?AEJ%VHC{(G4b!wvC8z(9 zu7ZJlUC^^v(qyG;cx5-t$xS;_UDt;yP>4SG*XhS-i51o zfBWm&-jIQ6jHbgrX`isKKkAFn3v}w7xYTTJfBn(=Ufkx(-BfL4sz_kuTnl^<81OrJ zp%Iseluet*cnllQuz{a?eR=U_@V47|dERTE_qr@eTV34$zSCnp_O^Z5eWTxm2mA0p z5Te(ex81?jd9QQzrft-BHPFHx5n94inw-q ztA8;Za^20&UClioYkndc3+`b!j{&f>1B)=SZn3rc4Y&CmWE1@JDGc*t7V1qafi6Nb z-)UBGHCc=oAfFtEk4DrGS?c45|JL|`4bQC!oIrzVd>O$sgir9)>|jv3B$ON*PQ`?* z{NNS5>|b2AUtquZCz+f__lm84ch>H|=;iOA@vO&A=J?BQ|9Kzmmh1Bu?3WkkAZp$! zWCsK3!`jz_H~LL9xSn^fy1ldB3;5gUOg9&(4Q=Evg$?GK)IG$Nq3p)aGy&KGDc=7x zn!FQGh#4EHboU-?QX0 z3PWGP;mMbU=>;DK%LjN84SBY^i#dFCMNrXyf3sNX;@FvY|E+)%D*pcKe_K1~V4_B( zf%mrc;)X4`Zw3H`O_pazApI>Zl7W23hY$at^YTr%e|dfW>WY00mQn!QuIzmWS=HD} z;qxdMdb-)Va32wY+nKW-`==sxQ17UHGFLUXDU%0*MU}rueIBNQ98s;9Xyzj4nr_82 zHDEZdvoNc_P>nRM%?Zl@<0`EF0?WWL!{K^rozRQ7_?F;j zo4ZL3S4PZNKa71xfLnm17vRr=iJhTNaghTf@O+}gn97j3=tCq?@a#`OS2O-rUY-O=Fzh8~5f)^C}nL zHU~L=ll1x96^(V}h!OC4SOkI-{>#d>?x5~L3u06Kxw;|kA61tr92G0NPPs+NZ$#vT z2!BhD7lTr!zDinXz=KI?ga`k|acQA7elTzBzN7+{9=u%H3B#C+x zX$n8ndac=vs#EneLYNLQg%mknkfL|=s7aZ~BQ}^+tw|4KT>S|=v4~}96IXx4A%pGy zm?@cXCmT%ZLpi3m(Fn%GmJ#dAKu#1f&r*`X`Iu>$(1#7CRR?dY9pd^&&Hz5Q+6pQV zjlCA(*t2z$(Vc*98?5h#ytfmDKLgK}W!S1X^B2p-q#Rdju-+ScT*pWIpH;3mmZjtx z>UBF8sy`SOISabAb(Cr|J?Q{kZ3-yAY2SUi8IK_%W}k9O;&@#9xkWW>wEvv|R#k7|yxsw(}40 zx8-3D(AkrUr3}_UOmBp33;Q3aZ|>e~ya*&V1x%XVaNG73eI5flXzJj>MUrQs_;DAN z?Q-DPBiGW#2R7Emm&Ou7+jm?#$y2!=);M5rlZW_gNt;dhsUVty8x$_Expl7CPQQ;s z5%@j+ova|8HZCt}E%!piC(~h5fWM_GiwZ3#0}Gq=QRp18oqRgg{PQxYQ9wXE*4YI? z+!k9~&CeHNwl7&~bxnoJZb9RUTKI&Kt{FX(#)S{u;A4 zy0*i4p>^kFuz(xMF@zrP+4fIorCe1#iSvMXDtZx_8)l8YUEH7j&+14`3SB)f;Al@X zeS`2K=$f3`dc%gFF*~8k;=+Rm1%fNS3E2o^ENZAo&y@_YMSt3Y#F>h~z~4c2;pq7$ z-(Dd^&hp5UP%76;T*`pXo zaSrN(re|_^T-OUNXGp57k0Cm4--7;xH6jn*wak3h2 zq^G-!>r(z8%lx~8hJ2MFOBLTOT6k1BRa0$Et!0$7%7s|79d4!=6vh~ATn`7f7JMsn z$OI&Ninl3T@rXwq4mmbKZ z?YA*-t$(of^wD7@MVb-V`#NoM;6K5th@6mAqq=~&(Ae$Hu5D^* z{H)Y4I2{(x%4Lp;9Um(Is#JyvRj+ zY7`sA#g>z04{z%ckx!~Ky?M%yiWb1CuP2SVi(SZZnmPX8r>EUA5o_a58Ri$O&qm|} z{>U$O3U`3@J=pbK(25-$SF{$7;7oYay}~x2pWw@zC?qa76q#9P6uZQGj)uacHGmJK z4UF-9q+cao5I4eZ*%1KH|_J>)lLvEtb50u2ru6D1!uKIc~taq5uE#8rTZ|lxYCG@_?DqI;(I`zCX^HHEsNsw0Rjt;S%Mu^f+5y!Mr zsu-1eAq89vk#H&Qr|&h&s2TG(Y>8{a|}FCtO^?>Q$R|FDmEpRlT624Eu8-7 zP+^*OVbs?MA2jMe%L|`r)GBsnDD($ zgs-$!!|UR>>G8e6#HoAQ?7^#q=f&MH^8tB26!62ots>)y*Ou|GVOydpsHTybWe33Bol&cmR zjNJs|&f92Xj2G)6#yqcK=_LIpl7bWyDj(SsY0Bvf)291@}+k5Qwv zI5{Cyq~|P{E9RN70yw&et=s;HYoG%M#f;+GS*Ps}y{4+|pm;)(WT=iRqM^{) z*MPQHsvb3s&_ma@@G+A`TtLfw9c4U{|BP6mG+94%153i)xSxbqSfA)l~3c z7xk`(l~A!mQXEwn5jq(|_?CRTfVm`ziHLx0kNBVx$$nVT=9!RCbj(xI|NM0%})eMV>1yy`p0;q`$skuf!tNp}ABG z82%K?2XQlf`0KFkLHvywcr8dEPl>1K;W9My_DJ0_6^!rKx$JW)H_@~7Tu}jbZS)Dl zrA2kwwL<HIM`Qzg!A2|$mz@!Z@{q9RaS6cf^p7$1%>OGMweVa-=>|3cR{G0tTe+wqjBpliru+30&WUI;7D z$N$FnQ30oD4|f2wtJ=@wkH^*6gcR=zI~w59TcXf!8>|mvF+#B-wdJY}odlVe=xZ`! z<2pA5-w)EzDs8O{&>Bz&$zhbDOb`G{wQSwet*MH9R;;8}krzQHpUPR))Z!pOOSM4^ zW&$b6;dIZ&Jdk9$qu~oL6(WBc2^OzflxHCE<{+Y-svja8nzogMjfq3c5`<}ELoT-- zUOTKTdQc``MYAe(Jo1x6VUh1Klx%s-hOC%29ZBiX_+faLfWH+vVB@p=v4Qe z*l2+OM<>xHHv&AG7?;`7q4G*^O_h-ACR#fv45>8p z4$FZeA|@CL-_MIy5S&=4L?PyQe!8NXJ9ON$SA939{h$UvThjY^!&+qoK(^z?xnbsX zv@&Jjx(%J`y%nF8xDq_|-nKr+n#!evqMTQCj~YsWDYc=OuzE-lsw@G>=B*F70^t-_ zAZ243sBqxR;==N+TSb+b3T40N-5i084c=(cLMe|Y9c;ir>qFPMNdRsA`5;G|| zq)^7R%2-xoELCb%Q&qR}7$OTS5J!jR7I|F!)-A8fnzW;Q5lE(1jwj?ZdE$hmERt%o zG{JM2%$^oERUfd3Y(Dmw!>tJ{W#VH<_s({Hdr^I4ozJx2ceOs)C ze(9(H;!v}WA=NDwGro+lWdsTdsfus>TxL!47u_a3*Ik)0I)BA|Ml;?{Wel!OF#JgufH#aNa0P3 z-ZVefYHEj^ad##7RL3*QW=Foei1PgBsB$r+gP-c3^zg1%Sz4JdZv1uWR~dE<>q>_U zCW!Lmc9(8Bs!ToR;<9l?!JFRIOX>!HdeLX9tm&)b@CG{ENkKeqZ8Js6Zd~Ob)o|cE zE8bP|9x_GFX#}K|7LsQ2(!|r2%_(9CF^$r1SCxGF$-$=w&uUaj!B*>g%lYcZTeeg+ ztjIZuJpr&XimS?~QUl!lST{t(uQS#GI7q0hkq}l4Mmj8u@N3ucz$K~ zhTQpdgVUI596rsUU6dNY$9EA;t>>I6OehZuOwuDojH8I(%5opnnqu-}Z_yVzIGcd!z z%y$}Is=CmBF;(scQG)oV97=J?iWuk&KXN?1hwPF;)pu5W$Z%thY6rW(`dHbPqb8^j zhkwdJ<_8*I)C3utewW!bsni4eMLN%7k}74_S=8k?Uk^HMR*~E?Tm|(&kzFpEb;cJ8 zx9LT$t-L9c>-p+`#ThVjC>wgSh>v*=0$)Lzpw(god!=~Mx2^S#59*Ned;58*r>!WT zBY6N#(Z@@+3mfC;^J?@`zPqw6tD`>ZJ`-`$FVCa|TM?v*r1;9<)lex89YW8hza9?0 z=noJGTZ=HYbGeUkMK!+z1M|2thf0T1Q0(r45^yW(Wz474L3HK`Seh z4Bh^y>#~{!MA2)V_>zh*t;OyHTz^L^#yc#+K?Ju0Mihv51?!NAIRc-xawugbasLJxmORe~iz+0gPBERLBM_Cd56!YE5ml|k+D#w<;gfW#erDpi{ z4e)j~U6VJ#OoE1RCC2b>U2pDLOL6}P`1>|k>b!8all(28c*v=?2$uKX;``Qe0UpO> z_2V2|(Zw`!qTBc1S=417COPNU?fVjT#LOyf&dhdP$)l2sJ)K{i@10+F&bsYBJL&bi zoh#PvT=g!_2khdMUA^qGFE4uMSFGP1z$2ZPtkXTe>UY`GFZ-PmiSn1&Cxj(2#18Qc zsz-_BDkku;r4P1YCOT(W9#(^=M>z`v;SY9{KpZH3LlMmxjE(DXjC5;-Ohz7Jq{7vV z+)0X5hXyC3aWG^OQ50ZacMR z%t9mHXhD1HWM}JOeNI;IXt`&^(;DRQJ}7F!)w_|udiS9v-jG;sts#qm`@ZK; zm>Y(xsijwc;X!Qp*&k#S-;6$Kz2PS3InzULx0Kjh2Fh;EMYY`l8lc2&Ho&XyDI0RX z)F}Reu^FHntF$`vZ5;b4({0yMz}+R!K=yf=vb383d^J+k__#md%J%sfjFPz7ZbFl` z4olkREVc)geD|di%GUx5go3FDLKOSiNes&VDiYd=v(Sjs&BGq5PT1|hvrv9G_1$j?-??Z#>4 z4l48G6+La^IWqhH+vxr$cfppR)#t9{cAn}BjlX5F~*L-OS-{SC;^CS&XRFG~zaBw!4 zr43goC#Gx>+5E#oN&!JVV{j=3OlIP&cgA0S_5*y?`N!|A^*x&xF zRu-2Xwq^DIhV+a+f*~D2rjH=Vw@4kq?DsjD?<@BYcwj*{H1s>-2wpZ4V6ve+CpK1<-{2e>I>z8xU{DiTp>jQr|P0cmA?VaRX%81YRA6{5{U z^&a_%;!uqtG|pW&hiK8NfZW%djsf9pl}VspM&$eIPtyv-xN$ta0m0r3P(A<`#-Bt4 ztMIkf`vL5Cq%lsMk{|(tPheIg@#Ta(75q?6Wx>bZMggY3K|S6GS%n-j1Tf#w+4=O7 zPxcS$x^I3~bfFWuouv2KK6~1`-2cTFtfPs9bCqnGUJj&<+&*(Uya83g<`Sf`cKe`w^fNjEeNeFFLQcrO zECMyxJGnT`4D41R?Nj24nZrZ;vaM=@wdt}XZy_3uW3ERom&$+65M0GCF%c76Ipm-# zoW|;pB@@pmZI*!rY0CfmLLpU24P>OfEi&DLN|Rla0eFXbnywqpr8RM)B=Z3#I_C49 zXLC#+fT3<4JUXlaA{Ly9a}f#F^(Ww5g)VY4k}7fW)lBi;_D9={RjA`lZxD@5Wc=}-VN5-qVrXNL_z3=_=RWMt9=lo2KW48GJROAIaHw`l zkGYFumf}|tVG-8)T9Su!VkP*f?h2x1ZD11l3c%{cc|6_!b zJI<3RPzX53ks}KC3LQ_uzOROX!UjBtO47Fj&Bv5knm1tJ`<4`Gc%JzSR|V}$#&25t zqzEDQ$hCFzl{98!_f1+%_q$gY19sA7XKlsHf=vYX*g%ZHFt<5&=aPiP@lh+L9iKVe z>p(hp$)E9tItq50Gmofu6Q|D5tG{3ibB1+!@NJRKkSqHLUQPN$gN!IHp}D5hn@4JE zdb8wi&WrA#dv-SHylkIdo%K2|yX$r7I+gK)t7*-`c4% zAd#D~EM7X1vcVMF;OZUHDuVljsQ?$utQ()quNGL>>cL?3S6^T&U*)C;t9QhsxO!Kz z-I}v58VJrerG046yah*6O?1XCzk1zG2pl+47Zij|jo@8_0b*Bq988+NLb{M}_)wY@ zoC62%Jsss2I_u)<2OB3uAIP)p43m(_gvYt`2XpKyi%c=qb<*sM+y^PQK8K%~!0yY3 zzo;U+{$L_`a~<8(2wT0gBdM}eXbySPsg937S-n#Rrb@M8j!)p})>aZ4kt_;f>;dHk z)5pn)bLHw!z9`scN&L^OVYa!%XlS5cg2^)Hi4(GzyBNGDX$k+j|6l&?|cUu4WTv%79Wnl?;Q zJ_jSh=MRQx*)l2xV&4b{J_a+=`9rc2%4U|4Or#0qNyD_pr9uNxO50<>(2v59D<<6NW=Bu}F+UGs+ur4p| zf79)}J@0~D{9G(&Qed6UZ^6Lz*!@o+s=WI@SYWP#6}kjDHAlsC{}ak_-*a4P;d2sv zZY<5gKU$>EFyTC1{pc@SY!1SW-Twh)ob&!WTd6n?TyeBpM_w@Vz@C_F&$lw*wl+O1 zFTka+9X^VUA%u^eN8vI?i^Ufkw!M6v*FGwnU){z@_$=B&1f@hd0&zU0<2}r4PJ91&jrXQk$d+UQOtn(G@FCMv zI{K{2&7Qa4w%K|6`qjmoHtbIZ1p{{ZYm6MR!33;vkVe6T z?{&bP6cwg6J}LGF6DRi|Ygd)O84xp%py|V%@}_*GVLfCJd0w!za3?9lj!EhTNa&l$ zUbqvnl+>)SOKb zOlNyb8=X%h@$_0*UV`X@EbLMo#vP^*Cdc4Y^b}%!t-KE&&*%0Z` zcrE-1Ogy*|;0OCc-heUPV&||LaggyJ)k*jv1f8X@4`oQJ(UmY@csCUiZ(#O1gG(O8 zB)-+pu~cU)VE971uriJmtN-pG$KEUDF8MdxU}rHg^*Sr(FmGhrQ7Q+QZnB&d&;&gJ-CC#`wv*w~C zN+WW4sx&eoGdLD{5~MzT%$NiOeXud$e2aOCDK*?It4nhWvvCRWCkKbsNE$@?<%`Oz zkbZ{>y9K2G2f8{B#ztz`Dw`+_yiog2qvvp=@9Au*2*(z#nDQv@lNP)leT4J zu-1@7G3HKHeH(N$?L0;|^k6~IN&rhxIKw?cZ^q=&*y=$sac6&=A&mo44|uaN{S1|w z7kYG~y!GRjR*v|F4I|f50aZ;LLS$a3JXCu@j*qnzv$fz;YBCpBvd2*zOz)xU!qM}P z{;65N=-)b)o$sEjy9!oNRG8iF&p!FArX$2r-n$rd2kk!l@_NgsuUSBbI{Eb4i~=*= z^Q$Bj^B^?#aRC_v)=!fuz3B}G-E)Os7%63T;B_Lu0w1 zfePmlIDhqSis%tS;XOU=UX>_`PF3iSC~ELgypFjqi$7(U#Gt`!<8w!|OQQ=kVhSX5 zw=lQO4`2fl1UPtay8X-R^H*2w>yO?b#?w{lr$BtORGbKF(;#eo29FQ->)JFwEB52a zGZtsVG&j=Q%abr_X`dkhSIw)_E(FLrmog;XB&0ywrj9#4`sAQS93N*7;#s$SK6v%j z!Pbp|+FqAntrV#2eDMz#{%j2Eb)q;xw_QbXT*rn?qx&RxbJ2?Mw?Cdb9;Z+9+7wjJ zO!}&=r@gbqssjps`tj#E!*e7sH3MIYOp9nG?QDoOP0shr*71q55wnrk#H>nclC%}p z!bSjcrjTf989=%83vD_wx%U#%NP!e+zULwd{d|nLi;#neSLj*k!umvMK*7coRXR`I zB_3@`;*(PkjghY!TdxUZxy%f6`-Xj8@2}5_;HV7ar#4Sx7tm52g>{EPW#J~hc`LyT|1^LHDh`g3%oh>A3ViXt-38tZ=@^Ydb~J( zoeU;3M$L?dK^@v44`C;<*iV|@#b0EB1XKD$H-Mye2q_dtw-%?FN%A1GKXu2YL-X@= zBW8kFZ~x}hMcDT9lBe4lbkDYJ#DkMt8cotrBqwVU6?>{$rkrGSaC_tnQN?9$dbo{J z5^YwKO(>3TjiiH#uJCm&6blAYk4PSe3q`_>{p5&8*unJ<*q!9ey9tD^A}ImFcwGtV zDZYnO4>=^Z?vBEQKB;SSU*!3zq>jskACMK;wtdtGB#SUeK+k|#i$?1xrZ^r8MTIZv z0#i5c)R-*v)YI89CEVipn6@SrImz~09@fBoj*3v93ou+g?%ORL2#NCsyK2pNSl61d zP#7LJq8SMf=_{wiw2D$Zf%{Ns2H(r553QHHS)7vJ>SPWoyAK`Gq0EBhadten~ zW?P;^UA=svlA)_Gbnyf-0J2;g+n;S7P3xUAwaW@XsMCRyxOTZ509?1+*+hu20XnNb zM5y6ZjL=qPTvmC0%|Tfv0)h?ZRvoq7tD6zLzWVPjQFm@KIRhJ48P-vxEqNNLn1BKq z>6DhH#+E()zct~b0)TzYr~QRGwc2<6`@&${_EnZck^>9FZbG*QZc!84?_@2eV z_lb#t*Wm>=AUYeL!&JJ&Cu#NLH&~Rd&PgI?vS&r;!-Dy6F{QLi?yTNYfk+z3S zAMl2AUUP_sre`30SJ^-FI3Ew|x)wIY7^Q8bFm$2@mF-j&Hwh)+wWCAx`j7Y-jQ#59 zc((m3CFSH$cT+|vg?e6R^U}s1c!j6J$#uxGsFh6|;!$HzF$9Vc&5#OIP{&_5ciV5^ zQW>AcuvVedN!($%*sjr9B(t#Ph}^Lq#c_@*;R|Z$dG}FEqzoBXt{<6dB9pXP-u}QV zQ~D~Dn3J`qkxvH$Sz{w?98JJb%>{TPJ)PFkBm^pFpkP7z8DV(|#0lP(nqp6POHxS$ z-L@jGhq{Sw0yt`Of569o?B|DxaDzGS@bzz z05*>7UiU99yX|wa2dx~5b0y? zCUSRjSB{=l+py1y`GN$@3Dg~@7YN9)IqNJg)Lb!`?E=1-t z*t1(<0tj8t?xdJ2h>sVA0hk8qPhze}6(Q}wE2%b7kMk~gr`YnVed82q;&_n%SSTun0b83UG`E)? zH2}=0D{ji|w+d@M7L}2SmZ?x!g1jp(PHi89hrVI=1#B@U;z~gxI*5jNpd9TnWosKx zqlxSTk2#BIZ4`G_7W21?GGwH_JVNhg|NQ*P#D79yl^ z+p^G^PPKfLj_MTx7ONAMgSU@wGGd_7M6=V;=R+9GGGQk~;!4FYlWD@2?>qIt(&#VH zo2eRg8eN>)4Dyq>Yf^@jWjXI1~9Lx zdgHc*Jwe4*&}$}8OHGh@b$i#w(iHKe|U11ob50Qu4&i7w2S981}7VXndjt-CN_&bgZrJ3LmVf5N!S71b2 z+jrB}Alpkzya~MsZV7fCWWvUuk;JN!ppX>j@jFx$s*iNS!-5E07r9qF%#Lv@R0PN# zwpS)sy*Ooae0wISo7QACldAc1CZUkz7*r=cH1e71LBpsdhr$K9adyim#JF3Uy-^CQ-BK25M5$`X8@bs-p^4pFNxMhghxdu5 zh*#(@xBwV9)MZvkAMr;vEpk!!X(6GV=kfcGW+S=;1v zUGTqw3(?5YturxtbyoA^+}+Kmb6ClGiwKAlS~%PA}hz ziVhI&Nx|c-JrVkLGO7cJ#`uNz~7KQV}WA;#jRz^;WmIh}Lds|s$GTTR;9}`Wk z>p5)mEx^3xeD&ikHMm}iNwCD$DHe5LN!nVkew^2f`7DPEh5RtTk>DeYtHP$ej|b?q z2iU%v5=;89$2v&nn$Xdu$RgBbE2{;BHaSaI!hBC-13IE9dMDct6Psp?;3PVZVjx0t zOM#>^F(?NgIM1ST%0B7(P#y#2Hw{Bf0s%X^$#=qCtlNaV6Rw*zi@|m(2FtP>^9*|x zOY%;1oGGrpDfAZEUYyB9)oSECjCQBfyEyL-*h#P7?Oe47?EHeAUn410A7AaBu@0D8{R`H) zc)j}lIrdcSo&T~sxaz%VcY3RTIcHDdD@u`qFR_(Fnuq8Rf}TD7vfudyb3J#@jVuS= zVkPX7S%UCNhWC6w+V=IUHhasiPu}+0_rK}BPP&P?&V*vVPD_-aW==u!a! z_TUDilb|GraiZ4-vx=jPor`n!6}xQr+u2T-k01O_wj>+AVy}BwaOV2#`uv2QuKu+T z2er$x9U31$na#!YQKhkV^s$vU)xds5GG%f>|Mq9BZ$vTB?e_on*HQoxc8VU@LNAZ1 z=*!W^JfE`|jeJbi=%j^YQUUK|HW`7PaxE3;Tw-cMp}Hi+rZ6TGauxK2FY&+{v27V3 zFDf~n7@djqci$lmEhRRd^as6i=n5=XWpHGYxq3KQ3!&X@3=*MPrs!x!+Xxh|fgCPW zA!?f;w$ZtxU)SbC92MN$<}lDGx>^0g8|m5azq8QZSS~Q`l*xSNxUy3~kT=J7s4>V~ z3ae~l87OBk(59~0{Lj)sPCeJgDpD1ue3Ei#$Jsp$v%tV<+$AI1z@!bhn+-QzIN@AK zYUDZgDO=D-j{J(QbiWk-M2!6)3D0d|-kVGA!^+eT#-VfRAMF3mRp&OLYt6_Xv>b_U zppMi8PZo+Qx=agsDo5jlfsUThg(K=4;x9EV>>KvneJw|h^n9*baK@c?|A_2^AWo?@ zr*v>P(7nV+43AXTY_#pVCVa9(c>o{lU5NB-j1vvX{$s*b@RsX6n<1*-KacO2FRSMw@4 zk532ft6!+Uu@+UZHXMX4FX{2&r}cEU`#G8PODqyBr(cW@s77qyfbtF^syAE2rKope zcNDgj^<0PL-U%IyjeB=Iup)K%>ms9MP!7YTN${!?UIf|0hr25~zi+SRW=>iC3< za`J@;Uw@>UBcR#OvkU-a_G4hnjiwAmPWvb zX9h?*5qu61P9PkoI_b4#snY=3QQx2N(qAru<^8wV75dVf!-A=He%VHU#y}kgMA#?w zA}kN@Zl7GTKKXT=M41KV$ks`4kM?W1)AQn?cXh(LE-cy(?psa0HObJNK)65-g6)f^ zdXb(d4-G=$;Z?#rz5}ULM~{)yjHjKK2kdE1+A#0tR;5!*cf|N0YY9z1D+Uz6$pi-o zT?NEsfZgqn1Pd&5JBix>M7>O{GsQ)n5LXieZA_(+Yb8@+KaEAE#NbSZtvg|(NO3W- zM3o^~eMrQ>a}`AKM|BwxLX=Q8S5jT0(TEj*bpzV|R@4#jd{#a$F;i@OP=alrl*UmB zX(KFnWn5vmqR%I~0>}F~=5?`SzL|EC(<-mYfhcD6lYDkb#cSezq7k z-bAb_Jf=*Iid{C3M==4|sXGUSisN<=Q1%ZarmMgD0$B@@n-J*_R9sEb7}^PwDjjDL zQmW_)pJf;fHEU{N=ksfYmD-BcF(r?$_>f_m&c?@OD3_|GD3u^&1fbqz^~OtNP7@Ak z0~Gy;(V3^>zr|jqk8y7Tua!drORW@DDFl6cVuQRq zp>E$QP(hbncKZpY;A-{j(z-WJ!*8AfXw-D&+66zvQhnbk`%EyoksAm7b!pzXWJj&h zgvG>E8XyX~0W~GHq(*jvocQ`9YxN(YuvNE8hjqKXit3~qaKYyG`auofL_Q4Iy}Ce1 zQ5K5A_Sk@fTZ!Gur#BMkGWACO>OUyq1XioWdj61?q8Tsa%6-G=(uT@CSJ7i9kW!?z z4&%r={iTq&3SwN{J9I-`Z(`C7A=yO}J#L^rf$1$(_jI^YAj^GZzEnX(IYDXt8VuVz z?2e1cWBTk1;luM~4jqGFY!g;cLmmHuXf)t3FtYSeQKww@*k7BI64zjDF#=DTD!!6l z+nWD8C;~%FsCNS=q$vw`47x^TeH$}x-^vQJ4_}G=<@@jKrMUkslNqM$3Ios->$VBA zui;W8^$TSPcAjG#ao8ydxr{5tQ7WYLnqX0gOrgPzhLtmf=iDrr)BW)^XYy^y80A$l_ zzSo(EVy4y=Yns54i2{YC^RRr+iu`~iCn9fX8X?Drs0fyA8=hf6kT%1oBa>j{fx;yG z4S~{^1T-U#2qe6TYn(D@6~vMYZOqmZNi@73jX~-rL`UN#@uE9VQbDk$`hlIdpJW(- zbYqcBnI{e&b93Z6KUDr;dSMLs)Xm6MiVavEtntWHh6JjTR*|fPj7NKLbXI?Cybd2E zS}}lPtKkVO*y11uDJbfzil2aWF-s!_+@BktN7gtXixVEZ!4S6}7Y21M4)&f)81WJz zPBcXfIml6Ec;Y+?mrj<2sUgdAgJ?>f1U08MRujW&dEpF3X!-(+Vs*X9;O=t`tgPUce z-f(Ba*;26ORC4$IcWgqLq)3$@g5l)_ad=z4|DM?%gMXL!ogS5!+Swe>ybv`m4h;UG zssy-H&O7}nV{B}hec(|u+g!mkAS^z6(o-bG7n-;Df}Qj*c?sc~|-OXjl8q>|}Y*+8BDqs65+1b_qwY{y~ z+;nVE%!f66udcf6b$c*4Z@+wvTiflk7v0xgScW$KeR|O!bY5PczZeXzkWGO-eQ`SY z1#4dq2HpP4_Stjz#DJY&zrq5*=iPp{*LgW8u7`=@zFiex(S{|Y0q~U=B*l0s%(r=j z)Ieaj2F&@a@+@!DshX$?jiIj_)jrv{C&ls zAoZZiF}e>9*FY$t8BIjI$ga0H(iwJGaX2qX)iB4>562Z$+U&}>86gNapD)>b0nYSD zgGlGff?3{wt4 z6sUff?A?Gt)^LYC5N@mgIiaN_n9!-`BP7Zlwb123_eBr@M!iUW%^mD;6d)x+q?q4U zKY))j5fZbh1W&PLoDrk%?7AWTSK9Cm!4T0R?&xD9yJ{dvUrCl-A{86N={}6~$he+K zX$A$b9LaKgC8{<*2Yxybcd!Lm#!tCi0>Us+Ui(uo^>q*xbOu%0xF1(;F&N4Dz+Z zxu}T!M5Jyp;VuH^uijB9F)UUTR~U0*;1fjIDp!BZN3ii>Hi#Cb>M z;T>?V(?+V1xbp(l98vXZ*2I{WEL|gG6E#3UP*b(?tjH>#bRK+>A&@BpOfnDFXnO;u zZZpMonb004jhWWfTLy$#v(nS+VCM5Vb|cV*r!H9*EjGoVNPtlkK6?LsumpPrLneL% z>Z`@T*!X^e@7{>zOeW@|2PTbYjeYpGk_2yFjA}q>H_LdO#4rIA+X2(WZ%VeT-l^y% z$_|~Dh(z>_0)y$)gj?O`)lizlfG}xxUSFvOKmN_Sr*!ih)gW@9;ssw0UiGMl)mIoZ z!N9?rcIQiYDH9^;Z7poD6AmE7eA7Pfwb}FT+sljl-}k#G=e;g_Gq_y+%h`GN#f2gP zx_-@`w@=z{I&bkMWG-=6d@-h9=0 zxs51-iS_y5K{#m-bg=BVy+P+?H`B#zut&r+nP^!OUfb4@Fmd)jObdlk&mNINU0rqA z`Nerm3e_ucLS3JCF;?2|b-H~pm7jJx{jHEenTU;kaN1|x0c&??V>KZb2E+8^SHElb zh!g8tO}~rNhxubb5gvUB^M|uRv)aZ27?8>zhIpQ}lP)6dUv@hf48FX$PJo`P-(9VK zSMmrNBRx%6sSS{xIV&|H*k_FT1TSdjEj+5D3mzc1Tm$oIz#?rjsV>%woPxLJ5YSF} z>6hfCO(F!a1mWtZ$_(WibJ<+B98^}*9h4i7wP7C=5JUlTjE+Y`(i^`(S~)Eaz((hPXDI*>!F=Xndja!lm%BA$zaZA6$D{n`6=Ia_opj zuXKn&N$J$=RIr0DSkmWC)5K&N$LfY^L><1X)@cpvB`?8I;t^I}1d%GuuvUM98;%oHJTV8Su!!@8Hx|NiTLV^3i`ALGkuvIkx+n`$+wAOm6O zMi_tT`qCPQaRE&Cw7++Tyjmbw#!fj5Q8!!@dA0_Tq5E&&e@C8rggvhWrLGpt<+qNQ z1?m3~ng$pFHQ=tn%pjwSn@%hiHCxJJo4}WAs8F(xsQP?FMF!leGSI>|}-H*ZXD}Mt7 zsof}OBKxhwe#%9PPe8jst{^Dh6dOnT8T7W4`@6NLYuc@FYhP}4YhOOEGoX-qq=mO9 z085r7Otn(Yv7TU7Ym{&RT^`-hxgGGYlj1+c_dt^Bb6N(EgK~@*IKGmvkL9CDj0a2p z)x77Dkj7&!TITdP^$!Llz5*q49HVCV`9l~jZAscmR{T3z2I-1L`8yswd`I$nBAv8i zOJ8L)59fg!W{zWY<>A$IHo!9lV{z43bYhd?-0-I|=G&3_u4=%??lqddROL>~v` zI>hu>^_wT7{fKMU4~!^YFIV&ciXd0mj%|};ryfuU|tZUfqICeM#x z0niG_7B)y(obg34k*Cd+hgzL|{nVh3+vIljm~=tL8K>7gTER(WM(}eV3w&_PI$7{i z>GDNPq$z44q%LM(R}jVdEdumbt)P!RdwL{9#lKh|6+hb=6+e3(ocuPByGlp%L@!fC z!3cgAY*{MFKW?RmjOo5PFs616)Ca6(%r?_wJX%hRI1XRDIC^|8FSm2+HxS(;nheat zbwRJl?Hr3}$mNU1t^+1w;ZTIG830Z`yTa-o=R$ zTC=U-wH~1_U@lmJnh6*s6Ipk zs;mKzT155Y^dvw4KIL-%D7D@on6YO?O}2P z(b!J*(~qD5#)!E*;6MFOA0i5~r{ol&sY|qt8%`C~v#7W%@tOF@$8*emSOG6kw#YIq z{|3rFVR8DQvcgL>rRlgC5Ys?sXgSJimcjmxh9^@`sO065^&!3g)nf z@1`uwgT?0$H&dlHGLKP4Ua6T*8tJ6$rbbaF(>M7^*+jwB+xgl)lb!pA&(7-7t&IQ> zH@lT-DIIdlG%Nl|;4-a_n|90N%3s*nGx>{Q@sCUp_jS+rvZdU1B+{z|c13iJ5&))7 zsjapV$L3G7+Q-IpJdNxs4kX2iqPAg@N^buaf{00g^JmqP_#6xeN+n8;gO;)w-^AZ zU!M35U;8MR*L12wdV;frlOV5*K$7B~?r=INs}G{wQalJ_A}atHU{%{}u89W)a}Pm? zD;UBAA#6JR!lte$=Mos^5(wpy?T_-<-yb+E*R53I%Uo zR!N<@nmDyCS8iuSTVb5)3ZoTnbx6ATFx~?JO(*t1on$r8>-jue&SXTnVxVnZEXyYv zBY&F+dFA_LF02Cst@NQaH4HVC_dDurRECSIRK%e?C}wjKt_uy(a67&2Zf~jMiW8iQr8Jj3U4@qH8U*Y{rg&Z9hZMZ^a5f2y3#;;oO&`f$+uAYXP>C$*`fn58YMo9s+aN^Yf??yXMh!$?;uPQEDa#z_fVqAVmO z7>mY0S)@);3;1*tEx0PNm#ov)LbUO^{mD2Fn~W$oy`&&D<-j2WMmHgrq(vmd&_S>W zq@y=e{3=+jSSI-F5%Lco9^X-Rp}bl_RnR=LL|QACCtZ>`lK3`JNO{8%yx}LUp3n*x z{3$SaD5ts}eul45V=oDS4*!%sME;{;gP`DhggN3-O1o`_+I}N;2Q)3tREidD9B@d`@ za9R9>mV-dnIATMns;b!*Lu}f?{A@G#a$AQ$y_lGRJGh{|Ya95WigpY}^m(JTgiR#D zwN83TNn!J0w`=tMBM}ea#U8!vk|_w~s!*OLn;K7#hI{MDVh#=s>Gjge9~co5&6a}X z5*;WjX>_|={lg^>p2C%|+YSXgB}ed!8%0~MBFQsDsLfTiJ1Xw3$5<1Y7ey6`bOQq5 zhqU{E?Dkh|&QaDxWwYd6h)^?W_S59As*wxBcgY&}xJ?Ju|GDts8r^t@U&X3?f?-5r z9me5Q?dX!W--)hnR*vDTPH9tA^C-ro2~1LZ zNQ|M4^edQDsh{~?lmpJSUdDNbDogQ0Ftw?v+56&NcR!&PB8@5T_wIKb45Qva;?vb6 zT_C^h{>;NQ_n_)iU@}d|*S^>-JbfO51-EN$tS``_31UBa^;w+vCm1}~BA=A1vJN~+ zTdw)gG)eZ{NGtc)%s=C!f$C6-e*3$tw?xZwd2#;mhvOGlXUD;l;3`Zo+d5D*(@U+G z#N=152zwC5!z4!X=2C?A08??XWNn{#!Ko_3~=>v<-6VAycQUNrDWBkMWSc!?zV=Cmc5*c)QuA#8lwU4LsN zSig=zy_qXIeWIzJ3+l$q<$g+VPoVT3Cz0G)@&~z35ML|0eG!rFjeO&`KRbC2Vr%_L zqV`GD>wXi_5v%(@Wt*1=HE2!;EB5bm5XOu&bBymp*L9INDvpZEhnP1u7?iFmEb zIMr)(t@?O#rj$|YuSe3N2SCc<(n=mk(m<+?Qgt9s?~%`ATSI28+(=+B|WtgGFXlao89iDU{E>0I!{V4Y^-^M`P? zj2KN=b1Z)rVZUNXt!yo+Z&)E?##aQBIGs@d)6&DR&j0d^M2)X5+Lg}Pb@feSG520B zaDrp3!4Al$7Gv?ny+LCL1IJJSDAn7Sz+>othw`KyQYpB4kQ7)#!!hLN&|}(!4x-%- z_@RiXP0isd2U>4wDVa0W}<#xIPRun1I2hhpT^QPl#nL1gF==u?_p#g2(_*qATC_U z(|pJmJQ=2=;%*ELri&@d2nW5XDZd|#+C$LT3tuQQnoiMxzq|q%@~QGza89ahlr|yI zP0M{i3_WzpdO!>-R2{j{`cT0ptWAGJ-oXv4YNKyW7ePewl2`3J*WVl;u&8w%&Bo~5 zYs~~kO2$@WvL<`nN^U9^INJ!T3+cUX?F{p>#d#~3f}xDuWYjKb2M?K%YY6v>-Abo@ z%{8%Nt^Z)jtlA`)jQXuQS> z6j$@E^2bpAhszL{{{ovRus<<~Dj8rxT5`05Xx$$I(#Jrv)8P4qatX1S5!aMK(t*+( zX+9)|3^gx075vsCS%!~BuOsrHgaj>?sLADq>4f^1+gg>q0S7+naO)a!_Mvuu`OWzm zc)WwJkFJh_v!gfPoWDJi>Bos(d}C!%a=jAlNxjbI-5OW978Tp4bD6S;Q3d6Mc-Fxh55z%^@{jrTm?=tE s(p@IqMHok20RB5Pm;e9( From a5f6edfa6c2edf9ecff17e5781315fff20d024d0 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 09:30:03 +0200 Subject: [PATCH 090/166] GetCSV refactoring - changed to mirror the original model class --- .../common/collection/models/CSVProject.java | 39 ------------------- 1 file changed, 39 deletions(-) diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java index 19606a09e..0d939ca0e 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java @@ -13,23 +13,12 @@ public class CSVProject implements Serializable { @CsvBindByName(column = "id") private String id; - @CsvBindByName(column = "status") - private String status; - @CsvBindByName(column = "programme") private String programme; @CsvBindByName(column = "topics") private String topics; - @CsvBindByName(column = "title") - private String title; - - @CsvBindByName(column = "call") - private String call; - - @CsvBindByName(column = "subjects") - private String subjects; public String getId() { return id; @@ -39,13 +28,7 @@ public class CSVProject implements Serializable { this.id = id; } - public String getStatus() { - return status; - } - public void setStatus(String status) { - this.status = status; - } public String getProgramme() { return programme; @@ -63,28 +46,6 @@ public class CSVProject implements Serializable { this.topics = topics; } - public String getTitle() { - return title; - } - public void setTitle(String title) { - this.title = title; - } - - public String getCall() { - return call; - } - - public void setCall(String call) { - this.call = call; - } - - public String getSubjects() { - return subjects; - } - - public void setSubjects(String subjects) { - this.subjects = subjects; - } } From f3d575f749043ecf12f8b44ea18722dd684d39ef Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:03:57 +0200 Subject: [PATCH 091/166] GetCSV refactoring - changed due to changes in input resource --- .../dhp/common/collection/GetCSVTest.java | 92 +++++++++---------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java index bf5e3dedb..292fa4666 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java @@ -26,15 +26,6 @@ public class GetCSVTest { private static LocalFileSystem fs; - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory(GetCSVTest.class.getSimpleName()) - .toString(); - - fs = FileSystem.getLocal(new Configuration()); - } - @Disabled @Test void getProgrammeFileTest() throws Exception { @@ -42,11 +33,11 @@ public class GetCSVTest { String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv"; GetCSV - .getCsv( - fs, new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), - workingDir + "/programme", - "eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';'); + .getCsv( + fs, new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/programme", + "eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';'); BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); @@ -57,39 +48,39 @@ public class GetCSVTest { if (count == 0) { Assertions.assertTrue(csvp.getCode().equals("H2020-EU.5.f.")); Assertions - .assertTrue( - csvp - .getTitle() - .startsWith( - "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); + .assertTrue( + csvp + .getTitle() + .startsWith( + "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); Assertions - .assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); + .assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); Assertions.assertTrue(csvp.getShortTitle().equals("")); Assertions.assertTrue(csvp.getLanguage().equals("en")); } if (count == 28) { Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.5.4.")); Assertions - .assertTrue( - csvp - .getTitle() - .equals( - "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); + .assertTrue( + csvp + .getTitle() + .equals( + "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); Assertions - .assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); + .assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); Assertions.assertTrue(csvp.getLanguage().equals("de")); } if (count == 229) { Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.2.")); Assertions - .assertTrue( - csvp - .getTitle() - .equals( - "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); + .assertTrue( + csvp + .getTitle() + .equals( + "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); Assertions - .assertTrue( - csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); + .assertTrue( + csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); Assertions.assertTrue(csvp.getLanguage().equals("en")); } Assertions.assertTrue(csvp.getCode() != null); @@ -100,6 +91,15 @@ public class GetCSVTest { Assertions.assertEquals(767, count); } + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(GetCSVTest.class.getSimpleName()) + .toString(); + + fs = FileSystem.getLocal(new Configuration()); + } + @Disabled @Test void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException { @@ -217,29 +217,29 @@ public class GetCSVTest { while ((line = in.readLine()) != null) { DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); if (count == 0) { - Assertions.assertTrue(doaj.getIssn().equals("0001-3765")); - Assertions.assertTrue(doaj.getEissn().equals("1678-2690")); - Assertions.assertTrue(doaj.getJournalTitle().equals("Anais da Academia Brasileira de Ciências")); + Assertions.assertEquals("0001-3765", doaj.getIssn()); + Assertions.assertEquals("1678-2690", doaj.getEissn()); + Assertions.assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle()); } - if (count == 7902) { - - Assertions.assertTrue(doaj.getIssn().equals("")); - Assertions.assertTrue(doaj.getEissn().equals("2055-7159")); - Assertions.assertTrue(doaj.getJournalTitle().equals("BJR|case reports")); + if (count == 7904) { + System.out.println(new ObjectMapper().writeValueAsString(doaj)); + Assertions.assertEquals("",doaj.getIssn()); + Assertions.assertEquals("2055-7159", doaj.getEissn()); + Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle()); } - if (count == 16703) { + if (count == 16707) { - Assertions.assertTrue(doaj.getIssn().equals("")); - Assertions.assertTrue(doaj.getEissn().equals("2788-6298")); + Assertions.assertEquals("",doaj.getIssn()); + Assertions.assertEquals("2788-6298",doaj.getEissn()); Assertions - .assertTrue(doaj.getJournalTitle().equals("Teacher Education through Flexible Learning in Africa")); + .assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle()); } count += 1; } - Assertions.assertEquals(16709, count); + Assertions.assertEquals(16713, count); } } From 58f241f4a2abc0aba95b56248d2bd59b2507b324 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:04:44 +0200 Subject: [PATCH 092/166] GetCSV refactoring - changed due to change of input resource --- .../test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java index 292fa4666..d24083e0d 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.common.collection; import java.io.*; import java.nio.file.Files; +import jdk.nashorn.internal.ir.annotations.Ignore; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; From 5cd57145303bbf65ecdfbde572f874ffe28a54a8 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:06:49 +0200 Subject: [PATCH 093/166] GetCSV refactoring - added ignore annotation for fields not in input csv --- .../dhp/actionmanager/project/utils/model/CSVProgramme.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java index ea89a75eb..df06fd6b4 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.actionmanager.project.utils.model; import java.io.Serializable; import com.opencsv.bean.CsvBindByName; +import com.opencsv.bean.CsvIgnore; /** * The model for the programme csv file @@ -22,10 +23,10 @@ public class CSVProgramme implements Serializable { @CsvBindByName(column = "language") private String language; - @CsvBindByName(column = "classification") + @CsvIgnore private String classification; - @CsvBindByName(column = "classification_short") + @CsvIgnore private String classification_short; public String getClassification_short() { From 5f674efb0c36201c797ef282b1b2b66724ebb3c3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:07:53 +0200 Subject: [PATCH 094/166] moved dependency version in external pom --- dhp-workflows/dhp-enrichment/pom.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/dhp-workflows/dhp-enrichment/pom.xml b/dhp-workflows/dhp-enrichment/pom.xml index 6f638b188..644ac2140 100644 --- a/dhp-workflows/dhp-enrichment/pom.xml +++ b/dhp-workflows/dhp-enrichment/pom.xml @@ -47,7 +47,6 @@ io.github.classgraph classgraph - 4.8.71 From eaf077fc34bf43f8b2b67ef5a9a701fd78a1eadb Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:08:58 +0200 Subject: [PATCH 095/166] GetCSV refactoring - removed not needed dependency --- dhp-workflows/dhp-graph-mapper/pom.xml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/pom.xml b/dhp-workflows/dhp-graph-mapper/pom.xml index 6c5e4609a..17146903a 100644 --- a/dhp-workflows/dhp-graph-mapper/pom.xml +++ b/dhp-workflows/dhp-graph-mapper/pom.xml @@ -122,10 +122,7 @@ org.json4s json4s-jackson_2.11 - - com.opencsv - opencsv - + From 964a46ca210afa179b2a1735f603653f99a463dd Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:11:18 +0200 Subject: [PATCH 096/166] GetCSV refactoring - modified due to movement of classes --- .../dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index 71b20b356..26b1d5993 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -100,7 +100,7 @@ - eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV + eu.dnetlib.dhp.common.collection.GetCSV --hdfsNameNode${nameNode} --fileURL${unibiFileURL} --workingPath${workingDir}/unibi_gold @@ -112,7 +112,7 @@ - eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV + eu.dnetlib.dhp.common.collection.GetCSV --hdfsNameNode${nameNode} --fileURL${doajFileURL} --workingPath${workingDir}/doaj From 01db1f8bc403f1456d7004489af2291282721278 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:14:17 +0200 Subject: [PATCH 097/166] GetCSV refactoring - removed not needed import --- .../eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala index 2ed76a72a..5b00e9b6f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala @@ -1,6 +1,5 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap -import eu.dnetlib.dhp.oa.graph.hostedbymap.{Aggregators, Constants, HostedByInfo, HostedByItemType, SparkProduceHostedByMap} import eu.dnetlib.dhp.schema.oaf.Datasource import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession} From dfd1e53c6982326338abca5b5a59f0d1e2b58d61 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:15:12 +0200 Subject: [PATCH 098/166] added external dependency for version --- pom.xml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d31ffb88d..a5e9f0547 100644 --- a/pom.xml +++ b/pom.xml @@ -524,7 +524,11 @@ opencsv 5.5 - + + io.github.classgraph + classgraph + 4.8.71 + From 32fd75691f17af421c8f5e7c4a8b4a0796c4cb9c Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:15:42 +0200 Subject: [PATCH 099/166] refactoring --- .../project/utils/CSVParser.java | 47 ---- .../project/utils/CSVProject.java | 200 ------------------ .../actionmanager/project/CSVParserTest.java | 31 --- .../project/preparedProgramme_whole.json.gz | Bin 15986 -> 0 bytes .../dhp/oa/graph/hostedbymap/GetCSV.java | 74 ++----- .../dhp/oa/graph/hostedbymap/TestReadCSV.java | 112 ---------- 6 files changed, 21 insertions(+), 443 deletions(-) delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java delete mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProject.java delete mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java delete mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz delete mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java deleted file mode 100644 index c53cd2127..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java +++ /dev/null @@ -1,47 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.project.utils; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; - -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVRecord; -import org.apache.commons.lang.reflect.FieldUtils; - -/** - * Reads a generic csv and maps it into classes that mirror its schema - */ -public class CSVParser { - - public List parse(String csvFile, String classForName) - throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException { - return parse(csvFile, classForName, ';'); - } - - public List parse(String csvFile, String classForName, char delimiter) - throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException { - final CSVFormat format = CSVFormat.EXCEL - .withHeader() - .withDelimiter(delimiter) - .withQuote('"') - .withTrim(); - List ret = new ArrayList<>(); - final org.apache.commons.csv.CSVParser parser = org.apache.commons.csv.CSVParser.parse(csvFile, format); - final Set headers = parser.getHeaderMap().keySet(); - Class clazz = Class.forName(classForName); - for (CSVRecord csvRecord : parser.getRecords()) { - - @SuppressWarnings("unchecked") - final R cc = (R) clazz.newInstance(); - for (String header : headers) { - FieldUtils.writeField(cc, header, csvRecord.get(header), true); - - } - ret.add(cc); - } - - return ret; - } -} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProject.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProject.java deleted file mode 100644 index 268d5f28c..000000000 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProject.java +++ /dev/null @@ -1,200 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.project.utils; - -import java.io.Serializable; - -/** - * the mmodel for the projects csv file - */ -public class CSVProject implements Serializable { - private String rcn; - private String id; - private String acronym; - private String status; - private String programme; - private String topics; - private String frameworkProgramme; - private String title; - private String startDate; - private String endDate; - private String projectUrl; - private String objective; - private String totalCost; - private String ecMaxContribution; - private String call; - private String fundingScheme; - private String coordinator; - private String coordinatorCountry; - private String participants; - private String participantCountries; - private String subjects; - - public String getRcn() { - return rcn; - } - - public void setRcn(String rcn) { - this.rcn = rcn; - } - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getAcronym() { - return acronym; - } - - public void setAcronym(String acronym) { - this.acronym = acronym; - } - - public String getStatus() { - return status; - } - - public void setStatus(String status) { - this.status = status; - } - - public String getProgramme() { - return programme; - } - - public void setProgramme(String programme) { - this.programme = programme; - } - - public String getTopics() { - return topics; - } - - public void setTopics(String topics) { - this.topics = topics; - } - - public String getFrameworkProgramme() { - return frameworkProgramme; - } - - public void setFrameworkProgramme(String frameworkProgramme) { - this.frameworkProgramme = frameworkProgramme; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getStartDate() { - return startDate; - } - - public void setStartDate(String startDate) { - this.startDate = startDate; - } - - public String getEndDate() { - return endDate; - } - - public void setEndDate(String endDate) { - this.endDate = endDate; - } - - public String getProjectUrl() { - return projectUrl; - } - - public void setProjectUrl(String projectUrl) { - this.projectUrl = projectUrl; - } - - public String getObjective() { - return objective; - } - - public void setObjective(String objective) { - this.objective = objective; - } - - public String getTotalCost() { - return totalCost; - } - - public void setTotalCost(String totalCost) { - this.totalCost = totalCost; - } - - public String getEcMaxContribution() { - return ecMaxContribution; - } - - public void setEcMaxContribution(String ecMaxContribution) { - this.ecMaxContribution = ecMaxContribution; - } - - public String getCall() { - return call; - } - - public void setCall(String call) { - this.call = call; - } - - public String getFundingScheme() { - return fundingScheme; - } - - public void setFundingScheme(String fundingScheme) { - this.fundingScheme = fundingScheme; - } - - public String getCoordinator() { - return coordinator; - } - - public void setCoordinator(String coordinator) { - this.coordinator = coordinator; - } - - public String getCoordinatorCountry() { - return coordinatorCountry; - } - - public void setCoordinatorCountry(String coordinatorCountry) { - this.coordinatorCountry = coordinatorCountry; - } - - public String getParticipants() { - return participants; - } - - public void setParticipants(String participants) { - this.participants = participants; - } - - public String getParticipantCountries() { - return participantCountries; - } - - public void setParticipantCountries(String participantCountries) { - this.participantCountries = participantCountries; - } - - public String getSubjects() { - return subjects; - } - - public void setSubjects(String subjects) { - this.subjects = subjects; - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java deleted file mode 100644 index dd7e1910f..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java +++ /dev/null @@ -1,31 +0,0 @@ - -package eu.dnetlib.dhp.actionmanager.project; - -import java.util.List; - -import org.apache.commons.io.IOUtils; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser; - -class CSVParserTest { - - @Test - void readProgrammeTest() throws Exception { - - String programmecsv = IOUtils - .toString( - getClass() - .getClassLoader() - .getResourceAsStream("eu/dnetlib/dhp/actionmanager/project/programme.csv")); - - CSVParser csvParser = new CSVParser(); - - List pl = csvParser.parse(programmecsv, "eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme"); - - Assertions.assertEquals(24, pl.size()); - - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz deleted file mode 100644 index 620e1abfbf28cf33bc6307fc9ada17ce4a048a71..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 15986 zcmV-&K8?X2iwFpXn5JL=18{O>aA9&~WKeQ%XL4a}ZDn6~Xm4y~E^2dcZUF6l-E!MV zlJ5OJ1!69?JrShGq9n`hJ2p*?*<;HJQQnz}*oX}fERq<302@D2Yj!tcVqf80o%25X zBJ(6Cvwnc8ViiSz_#xfC=xB>1P+3`3S^4|rfBr7^!r#6B-P^a97iVvN*ZZ9p%?bSd zaqwpFra%7r`uz39>r40#zDoTx!0#saADp2dFJgC1;NxT!#pyl&4*m|@aGAMF{ELM6 z#=uPyf8l#>>PI-%H1d3sx`E@Z+#n#~k|fT5In%Wpr#e7TFbw=|^l3c2nf~do1NgT_i8|7MP{QQ#M)G)jz4O2S)9WVCRr04iB;1Y&TV|M|BJ^10$aba>PiQ_2tlQsM#^nz@TUu5BPR@=Rb43 zhXDT&zfT>1PTYikIU|8j7DIMQbm#8H<5 zv~b2*9Bl~w*iRG3^HcxtB*gc1z=={@U~9e|kgV*m^S5V~BY39nvCl3e#*aOq4UWwt zS@~W-*t;J|l8^wFA|P%^V*1%H8O#y>{RES8{PiY|9*IN3M?a3jH3^Y78xk*~c=FMFj$s)23H$2NpT%r1wSNBZND^g0kwb7zd>|dyhku=}+!W|eHn3+C z_)D20JbX-it1PX)5e_Ij|LpwAu=CGg=by{_{*L$|jO5`InA~wgoGo!?jK**`*{HLw>GtnxU9Wn0 zt1##C{NmhlBfM2_ggbWR@W8_fg?}Rfz;w8D;ErBo1XciOaFXXFC2Noj5nMq|mJnPg zNq)GSO~A=ddd}0z4+vgzG2C19Dq9ghb^?C^7v`GaTLSWk5(9n-bMagpbG-q90rDb+ z52El9=FPSMXcDV13ZkXIy-3Yp9M_|d1LsvS(`fcL)NDZH{hltz$?>o91@et?LOn3& zCTTLD3e=T)nS9QI)bG!IIN3mH=*G`Ta~!$e3RnhW*GX_|iHY`{B-`N3V4|~UXO7fH z8@vG%ocIa^Uy}MBe3AIekV-&z{)l(t+<~hxTex_@>C_{T;ckFeBHXr%ehj*x3&Mdt z;%-uR2E0-RcUJBrKAJ?E6&;3dZz^Hoz5Mo$77IcLK*j~+S6HmjV;u9Temiq8e*2dQ^c`cL2y49Pwwpe8Jz@uNpS!Do<= zkfcvhjEY8;V?GJz8OTE)))~|g)T&qh1~%rW;t$|A34u#O)%J@=+#}(N+A4HL{JQKp%-E;jr*H?s zzg3$arU@6SSTM{8WF>xmmkErwWU- z)ju#PLMdBhFxmz-04PI142<~XQ{wT2)xGU{%oV_Tkg>&B~&>|s)?U6V<-!8d1jLTid?z>ICjQg z@4noQhd0jTW_mxl|9U@mzFfl}H(&m3ct81a;|y;;IQQex$D1#ozWnj8&MVw3`jX`C z!$}B=VVZ$m4Vz^WhS6i85a%lJbN-SS7v~TPZT_-Kv?_jpi#PAA(M)BRPlg|EoohEq zox3QFm{+{xsHT~sdZL@AcPo2xaB05p&gg&g&IE;rs-R#Rf$BqK?TTAH_|KRu@d&Wf zu_kjkpka++9YVD@YFsWS3a=x-L7(Nj(lU+82dWu@ zR1^5pqdxe{` z6WA!YE+IW6e~#i{4(9L#4@3fk_VPOdh{PRrM-~BdZ97xRM3g?3&1x?TfFUsS3KXc+ zxA$?Y(Uq{LHQd?VMpI@+#kz?SKg?-1)6!mx+!_DV2|8s zikwnv&^X{fm(Br3vwUP(2ok?XiJy}V3Fky4PDqmlg;%*zA1=Q&n(cRBqlC zr=~hC-kOg0fNrpJgI|ruBXg%8h>bNmwAnI*ZW!fL+`7}I`#sa{F1(S4XCGCYYK!~D zK=WR_RRz1nwXr2!?qi?^qa5+l4gExfsW-6;LJc&s9_+06(HAijn%06u(flemCQe9D zZ^O$51K^n-piE5s0NoE!?Oe#-Y?y>R+&_ZW^M8I&5*0?jLdkmxfOqzNr-s1#&@*usfi4b(;X4HF?1W_He3UL&zx72+q2)l2Ma4n`-MajJ`*}{8UqO(IBlmZ ze~}LETt9Gr{z_GlgJ)1Zx?MLdT9n*oY-;z}=R7bgC55r0fW+phY1^^#y`MXZi22^s zb{{w2lT_8jG}bGe(<;e-389 zp)ud%JW1-#eUdmLd9YAWQ-=8T$fVJe8_!`1&XNiNAs34&fjzK_qingt@A`^wrvgt( z9s7LdG@C~*d>Xbwt{Q_rv&W#{DVdC;FHB&evzj>05bUCEW6&0gYKcJ`->o`{-um$?XM!5yc=qP=ysWY`Cv~$uQ~lS?Ki4c1AZP zI+|$SEIUI<;e@2uf}f;dAG45aK;x}$uGDNYI5%FU_2acof1ccbq$v~EU+x^H5C0YK zt)ri@ZGUhIX(0#KlDHe_`2^;rB`2Gs=vS~m)zDE)Blygl@k5fkMm%aqd9j~xAu{(9 zHv#&NxRSf#Yz|H3tu(AwS88%LoYAXVy{yw12!TZp#g(_p))=*MH-5@eL@jlfm6ok! z9q4t0a}$(FSCo>&Y9mnWXFukXib-MNA2#tui%FX!T=PnpLz3tE9vRd8aKkFVkgmrM z(e(3pYe$rMu(Ap-Sv8Z8zKsUqh=&i0q7zrB-pbgfyJc`@p)l2l!6Sjq)`NB8F8#uP|9axW8a+j^_maBg+{pPSc?*Ytj93 z+sc$Q5n;OY2Yl~s+kAY~2a@*ZG3p`Gcv}JFy&h>?EbK={Ljqx+!PPQfV}$>l7Ww|D z zwwmE+WMEuI>;9Q$<+8#o_tEtOcjo8XdX|u#+6#QQal`*01^z`qs5hsQ;>BpAyqNnj zXvB`N3-Z#jS}C-rxVK4%24rP3VPr{GrwOqo^R$;AiKTO>$)edF#z3c5**CfQ@O65B zHyM6%K8=ST#&^??lUtZXd1t52*#QP2&aI!YdjaAbqP(Y=TKlUI37Xa^majSnde9Wc$=8&*RAiYT{o?X z`Z_dK`=r$!J=mcFnal%fndvd7+&pCWy|foN4o*lju~IPbkV=Y{)7ZwZk-$UFM{Q)N z#>1sKf_J7vc@DwMT>D-e&2YbQS6W2Pz-vMpcM$qI4LeZr z&NgdLf?A=nA3s|8D8?jJzo)9JNbn|(7C0JxkY-N8IRQx*JLxm(u?o}{{~k#p^E`*M z%AP$IB&P2>pr##g>{v_R7rK^(R;~RrTfWNqmYiLK<;==MBwsa0P>~%F?o*LSBNrrt zoSP0B?1u_nJ-B?E%G)ESN=p-2{=y;3mJOu}D~vp`&8b&#>$nRD@?Q-+%BI zzNJQQeN9O-NtVay3`|6AB?|jI&sLN&6`pY9oy=TzCQfo$YRcQ0{>B)m8hmffW#DIU zdQ~@4o6X7lu>0rGvUr6_dtb`1X6wm)pXigiIDSp z&wqSpE^&CPd;f|UHiPPZ^jS?LBJgozhZ#ZwU_dfcnR$ah8s81;TgJNUyYgSji5+#$ zyXn%Yh9{uqmYO~DmpTyoXW}LzcI=|Ul%R}RqMl+)&^)@EvVru{YHaB;!?X&;+LkfP z8r(8U$q36K%CEA#gvl$YoBK$M{=`{J*gtWte(@M=}1FT7Tbi@!1FSleqk9tM_*~x~Wgcv_`o07m6$|J09zi|4MvT|3 z;?SEc=CJ>SOA)jtpPe;~Koh$>dK=DI?F;q$(hoQw6s_MkAntvhj)u${M@-tka zVAenU@Bc*e=qY({`=bm$8RqfY`lv4KL4z$KoN+Gb~Mtlwt zhtB5!3QFq+j#4D~6bme7_9C-|HLd5Jor6s^&b5wlsp^=7>gcuLR-NgcAHOQS2Uvat zjI;*!;sCiEX^P$f0nRZgu4Gz-|lC*4X zbAWTJpX&(3zYlvXVSu40Q|D=?yYXCY+tBK5Y4l(*$aZA`@&GN(gYwf{95ASEQ@YW9 zk8}*-zLT~N5Y_h~Ohd)CwL4I>9aFB0zF?N$9Br~uCkaA2_ifo=8mQ+zYq`yBd|Xe> zGRYy8WgDf4Xi5abPM}5*O>&H9pfzt`iNgHCrb*Msn>1#Gt8Sk=ouG2JJx$GNWJX?_ znG(g9*uo)ph=ATw0lR0;b!?q_WtX=RU3@QxnZEe) z7fm|qD!bG4kf%+JH#eKcB@D^0sUsQ-!YS4wTlnXk6fGj~Z;LbY=8!ja9?z_cCEbQC z4_E_rCmEX)vfACZN|rj70r&Hlu|$a|^gUkBw~A8w*L&wr#6nH>C!)BW&#wmcdI14EhVq#1JH%dVBi%5%LxnR zPV`mjujWgrkp%T7ut<)}EieoUB3X3eC34N8ykS(&MCfuff&wyt9^4*o-@F|)^K*DM zYkBAB)we(`WtgpVrlxAk-8@x~v_)GO=-IfuK>_Z^O&rZL_IfP`!($tkszn|rj~zT< zz>2lqT0sLO>3K{$JaQnT@Gj&H#R4s=1c#{kNQ%%63Y#eOmEL@d1jQURd8#Jdag_Spaxsu1l< zQNDIivluK0E9bg4_!C2Z!zRf|B042+CA;QL)wxkh)77cEs#mpUNsT(+URAVtyJ!9h z!yeoezB)Ia!9T$ojzRlKX(m8UzpO@PR9ZE|kZR?-`%f+1#6Gncnjl|=pno&4cqK38 zyjFp^-oDFG9{O<1czyDA{C)LS8h*HiMJ8SQ%T+3_2Y2pnQog5En?dL1{*QI(irAmZntTZR4yfg&m_1G!c*mWN=#)xc z@O0&-XtZsJM|1sjhmf*(2feA`Lv#>rl@HeeqsmM9W?j8L+lnGFMNBj|Y+qna z$J5~*Yk6)VsCrBF7hbI%086>u#g5WkTn0VW2Hkc!4GdbsWefJ?K2yb*_2c0;b+U5f zJQMyrFe6|G+BXk@Smb0Qe~NKU5|YA5N9S3~U>U2~G!^4~P& zOUXgPBXaFxI1>NjA&6_ESSs<{5mpqUu-JbB$5wn;hI*~lv`Hb~a-28S%Gsa+F9_b*$FFtrt27&InMe&=!HjUCI>0UX zCPU9-Z33LwPad2FLdb2W30Z$t*>teJv7|0BPVZ16YUeDDe-&+<66@4Q?VK>5| zT7Krdx}9F{)%SRMdj~5px%s0b;-sT7trPz9%MDE~wbN!3 zM;nmta|ZB`qp4Dfh)6s%Er=H5`WvQ9|CenDJ}<3W)~vB=m^ylZ`Z&V)wZ+@DO?GAG z$(7m&dK|83wgm|HIRd97MG=GE{*4yyiw1Hs^|fW>MR!)k2R}E@%UX9#}5%gKEll zT-ArZ_jFx%ht2y0D+~1`2pc+$qDNK=t?T=(>PwC>7ozn%1wE=~Zr^gDqvnhm7Gu>| zS0&fmaRlQ;4$wi9?<$;E1M%uHKeq)0&DFrFbP2c6=b30p^@#N7%fRv64U=v89tOb< zGRa*ESciW58RVbP{D6(tlMNC6EpNPCH0x_&?$rW?$2AMpS7kvtd#fkYP19`tTnIYs zT#&k09k>QF3NJ!*CZ<#!5UApL98|Par=Aq%DQ8Cr=?pl|N8w39uTe}kctXOId(gq& z%wIztPDN3R7X<;U#7*Fqh9d=><3%no-3+}nX(S4~SqpCZZ2=9!;<0QzbjHI)zoNf| zC*Wg21JBmjz-g`T7}SeFb7#U{=@+aM%SIWHFQ!;JJeA*CEu390h++Zw#T#$!LU@~+-f4KKbG*SOIMH#-w7pob zm95&UjTIl*-pw+)aY&XSuixf;t0p zaABag^N{^uGd}_z`{6gy1P`Gw82-g14}3X4N0{&_oqj2J^XjLcCjFo9C%@OcgF07Q zbW%1n2QBx;(f)@A&6aneQd%v!D zRRftkb)ptaV@N2)+}(NQoFvZ7z{K_(4G7h-3GuNZqZEf3+YRs15+Re>&Sv zXyQjX z6zV&(%n#@#4S%5`9|Zmq=g7kcOx)=k>g6{#9GR(gYSqFjd+37nKq4s%bwasQ>bFNpZvc!@i`ZZ-8*jaid4$nKYRBp<5GzH8h zXWCkoaw)phQba=mT3(7M-?CcWT%~*^YZcy(1R(n}nyGExtXhxgX$p$t63$9kSg4|N z<}8Y_>0tp{e!O;b*=ZbV1eY66T^I{O!lhboCP4#XK21zkyhcGfh-B_8Sw^3dV&gr%9nykF?j8(!k=SGd6g=vhvs+An^!TX|=CXHQWsoaBr zCG&=990d*FY1El~Uuh!E=HFK!Nxf){m2{V{i76=(l^K^-2m2CeJ$@=_J26SJAf<^X zn`{>NUhv#sVA{$&cZFA4-rX&L8z8Y-uin}T(ehZ$ro_2=3|~%^)R$ndC%Vm0t*p82 z@oMdEwDqD=W9QY+1_rWFbwL@B4mv0ODRl^k86M);*|HN}hFZ4C=d4ULk98Na zqB^fyOxtL6W*X+u_5sfgedSIcp&IAUG8j3H=;Fx9rii7W6k*~I)J9*x7e=@^6HhKs zS+B9uy%5}dVQv%iMuiR!I=Gc%B zG!*xt$LmCK_UPW7lMM;yd73s%Bterf@UhiPH!Gsh$h}U%qOWepmEdFP8C6@ZMv2m* zil##}(5n$JHZ%SuLxXA|@RVqJHOtT9-!RN7dZh3Eb@JZ%;cJL?GyfVhvEmG=XQy7d z7>cOp4_oS`I_@E1Ni)b6k3F?Gzgn=tP^;Bgqsm8PwJut%N|x{S=p&qdZkR>0zp;o* zQYT8rK9=YXGOF31))%T)rv=^OQjZee(%nFuhcJ2y$efE3){=^XCoL8{Ry8Lct2&T+ zQ87OQ%ywD3JN*Rvn6d$DW@k+{^-fMSK)8m0XqLujG-@qd?Rg0jb3@Xqw**vTsknB_ z^4a&Hb}PQ7lmI-q`%%|o=~g+?G!L&S(sTKGsobgK5cWf0fpMl@_I(g zRU#wz%l_!(Ba(!pXPn(nfeBnwtZi-MFV>a`p+`iYh2t|R8*?|}>g<*1;j2VccR zutJF=v-b97=2kh5^f@Q54nj1=&X&IU&e)NgB2k&9O-J zz^FmnxiL0v(aAedpDGvY)t*nCA8OU_nYHDlnvgNM{{XC6qSvH-d8af(R0yf3zoPU? zt>oNRe(_cnGq*HNlfp5aRFOH;=B=bRT!6lFbPX-dmIq>nQNkC7Pwq2wKjLM|F8*IKR9#cv z2TZL}0&7RQ!+AE8NZVyA7d?^~E^>VN%#C9oOmv5KFZ8JpNU<9tzoRjy|N9d0RX) zoQ1qZoLh8{zANWMC#dm-ej4R)|5y?p1hqzno5|=yE2_S!HkBM$CzYDMY6BG4@SJ%| z9BYgcwmkMMKqX`Q zHhK0|zKhQ4(`n0NA-eG-K;yWou(XudZvojdz!C;!<1WJ}@pB=}{_18Cwjp*L0j{!} z*18aV_8Lyz>KA5b(3uL`vhOk<_vUYWmuougkG4E8Eg^hJ@~F+K(DUGI&Bqj>SUo_k zb8=_g_?RQ@jRni~LM~fxgR9qPiX8d^VzWlL{f3M-L1p7s8XHIz=th`oCeYQM=D8w= zjf`jl2*9vG4z&cDE-PH1 z>F+!(S_D%mmQBDX#{Uq%!GNb*ge?WEpU4Ny67K)wzvnL7A~Cw>gzlOI&+@n)tvMIX z>=+FZo(Jsy5=R^05!c8=QW4R1cQ*Ss$DnHY@81|3wBj&Z_KntFZX#h}7llDc9#*JR zP?|^A52Re!-zeU4bp{;`@N(5DXmFA?M4=bW@|V-ux6%c~r;R1hmt7xDkyeRpj=r7i zDjj zAu}hNAbQSW(4q~;Q^f-@8QpW4P5SM({FTykl|ZAl$uDvWr_DSl<*zw(whj0A zZIE`&-mvvE)Ovt@CJ2WYEx{lJ;exFlK`tizwx_KX-FVJFp!EEbxm1vHAiXWqvW1Hm z0>x|wLf5_*M}4MByJ5uY0;3RT#0v3JH(nB!tmTJ3!bap$g5yMpWo1kfg@Qi+(TV^8 z5dySFSt65J4socIX701ZG{51j754eMS+k0A#;a=&(qOx%gC&1vwe1B9}Lik|NWn69z7)wZhw^F zC&L_1a6bx5m{fGg{`4WRjcA^rmRezsBM!D3)a5{81q6~>H^g9pmQghSwo=?J!#Dy* zsRUD6Ohs>I3aV@QM>9TMy zEwK}9vgH}p1vsfPM_pb^2JO}YGnJi)dREwvu-)oaDv#ji3)SK-9JasUWTmiJxL_{L zjiJ9~rJ~QE@sl(+Ys@_A>LP4vMw-fMx);y3`r0xKBB~#AO&Dg*LBVI}TcjO5s2Q1u zhmA_?DF0=1>T6YMYgcHc$!cDF8}6jWcQyOOl}}bL#{lgY77uO&Z2v*lev+^d6}vii zXA-$(-*pVG{2E!)xljL))lhm^4bIJ2HrT^TO))n0Svr3YTQ*7agatP29SM9Yl0N#& zl{0eT7~v(VM#tLsQ0&z?>aZC#+tun2+Ej63?96*c%fVG(enNAAJf&`w+x1$Uy6tGO zku(cExhsOZ4&#sxiW?~^!m5^c?l;{=!^Mr$ubRG=z71FA*7e@=3{4hGZuOyAE%mjb zw9rozer}JdD23XP=h>>#nUYuXpF5k=7XKys48-pmG zb02LwfdiBgDYuEK1?W;IP*xg`I@HItXf|Sks1KG zIWPWPH|jpXIl!(sc)ejr+X*oK4GOp$ute$wx5TeFR?l-9=4+ zNS-M5n(%<%MLeF$K4)PWxhO{5oT$HHS6CM>d0D!MN7IPbHLT&tWNX&kgQHz@pDp6} z&4ERrC)x;VS^V#S?f$+Nm2a=_T;0W04I9vW`CjTBY>MD1b1}_Wl`6Jjj%n5?g#Zej zRfI^&G^vN$Ltu`2MCbr4Rc~&@f;FH-tY{?%z?bukTzo1}WG*hZ;m8 z?0Rylzv|MSKdI<9xwDjdNfxe%^T*LFN|X1_9sVD)z3BKfafiZ~RoZS{=O`XB*<;NB ztUf1I%2}+%w|Z#|Q>K1MV1R(j;bjobumBQcW(X;l!epOeD=#rk9`A;1jmj5lU5pI= zz~3^2im%aF;jXL+sKI!Wu?57^`EwKpbEl6Xt02R|pq^|!45>eO5paN-sqCc^4=BE? zM&@5iEJ+iQkan2#$S0C?F%b<;&4z)X?5BHL?=9e15&TC&6zj&0eTk%MChP9Es@90L zk+@QTRN#wF@>+R^H8RhI#ItM_zxwH?N&n~jNsT~h-Q*=4H1kdnU`x}fMYC2Is!sBbB~fo^#JVlBg+334|RbEU@0eyvJ{LQ zM(87mNi6PI_#~Lqz7TvO&&&?z!%0sAkuH)51LCU_l&VEv(!3hQ-Waxq+}!W0)#Peb zOCA2cCe5vh7Jauly);CRYah8<#KHuK6R^?KAxDnPL)8E%Oi6BZjsR1NUR7f;f|PF+ z3e^a+`Okm+ALmBivWEOu!}sl~tp);G)JC^P*P5ExfO9o9^E$$s3UM;z4i#z#iUOhS z)a}U@o0wE$?ol@_sW2lAlgxJ446LQSQg=}bdggj7LaRI6`8>u#54!AqxRF^0(t5kpW)1zjAmgiv3s>D18_2O1qVl$OOYKIMjO0E2`F`S71u7T8m ztEG%Cju{XK8U3bwmO)tc3sBViz?XCThzJY+1ECMQN~ zH^?3d={|;!Smc_9v_D*o~M=i{?RPtA+svQ}%cj37B6iH>4b-nz;W9bwaRK78E6gq^pQSCFKwp zx@Z_3`nGPZ9`!zKienubR;Vf*M!p&Qx&a&O5$N6DD4;^5+*qWP~Mw~ z9wge(MQDDagA?c)=&|DIUEPFVjg6(+pLva4vDzkHHT$iptpl}`MqtV<2}P4$yFrgh z@!V=eav0L|R_a`|sg99RUB~$pJ@xTQ>e0eY&y8nMm?!fwuULVVci_CLUGMEts1==? zG6>GL@R(D7odxWgSFr#=eudQ0?8Q-%^cMsy|3H%7IbO8B;brzC7=C4xg@MQWr?jXGJ5)bRxT2_q>-Ri9l$K9|uuy1J&c4}_Z z>gg?Q_+0z#w<0?oo~YRi2DH@-tiNd8*V2VQ(|!T8kdz7#Rw%#)t5z7myIauA!cFvq zmA=e<2{M}ex(OmbW&N)FB)2mc8QrJ+Q4Y&fF~Gp5Kt;*ik3sDc$a*NOyPJsZ&N)hQ zRaNqxt$T9CSj1ExK@1 zS8CJgfn^GJtiNSnG=NCZctbauwN>A|n`sVLE0J20Ut8?#%-+vOD@j?HN=@*0bZF6` zC>Ee8;>Ymq--sDYwd45L{#|XG=1S{rdYhe)7Pd8~6I|*}tLrSLmMg5?HceZBMdk_) z>4^L{MXa%hU;D8P@t82*FwcMzF-iU=_$jS!z$`JQS_v&l2B2VN4Ta%zWZSv*aPBEC zSW98v(_@X6XKr>2UFg?^BuB-N^Ef4+U?BIFd$4nc7}fJ#qOd5I3XE`>?P7_ zK|9+$h=NC0?bilD^(b&LkpI_)9NwH%Z zjbq-34@Xg$BEa@7h7+oJU2gASqfc)B=#2j|8l!u5G^Vw0f1Z38-{9xB!@K**XmUHe zpM1HYan_dSWLu$444acVu9@=1OTM1aoksH}KyHB@y%B0t%ap5Xx6wJG6~~ArxOYS; zVjonM{1zXtNlY;UR4kM0@%>i){@zbjQquQ+dkJlM&v$B>EM{?XZYdNfGDdoZUyI%G zN!ph$`Xmb6Sc9KB=2VtLC)PqP%V8v{ve$@yaG@4gj}t^I@!{@*o%GLt{J-%X#enhS zE3!`ANRc;m;&;Vi7V+PiMRzWi4* z(B0v)Z5p1h5wf*Tg(lO9ImFEUG)y6v9N8BRiyD8Rkdjn84VnLFYm?kneoY6SGe_#8 zQ3E5|_QI(lr&7eKyT*_gin~S&9U8hO*9HwM;lG2Fn0_9cnkScMvGiKs{#xz2w}CRA zTVV?H4i8c?0Z5GJ{sK;FI0w5q-(>DDnaeMZQ&-BrhC$>#h`V+JvJAVv2lS?eSz_Ok zz^A!wMdi8@@jjG$E#BL=ZO=|acCl%qm#VjYL@Q#vEKL~&Ay$p?3=>(LM3FwPbfR^Z=Z)lGP0|Nc9AJ{)Z1O?x?u9uYzG zCT1D=MGBA$tf_w3Qh(WZX_10iOrOOgp3I4DJgxOM>wx&9gEjkdpSZPIePhInQL* zYgL{ZGiX_34F!pNJerO_)oBT})g(g@eyj~I7=2ojmvpBxSK;|~56@2uWUk_<+n3-L57EKY zYLOq4{&q&a9a`k)I&hapm!JM+^D!WKw59?<#Tc6N-RvHEt1|4){ zvOL3sQZ*y#6Y4+tGuV3vWjb?bxXz5@zbmk78<5eFv5<1uVTEH;*@#z2_7(#oy%kW?@aO zqH*dj2<)49&X$y2|kDA7-w5Q6~lN~h! z>MK4w;%TaP?>lqS=wJsXro~>;H_)EXoUTD^6$D=J4iw_DsK&xL9KDkKX3bai2FAeUZrz*8n0XU#dPS??}`dM(SakaWhM&`|(}{ZjS;G3CyQzy=ko8TdnmXSkO&aF7x#n3* z0xL?A5YZ@k@Pi;xV7rT}(5FRB0V-p@0ML@IM}3VykETOj2vs~&HSY8>0(q?ysTGLa z$OjEK(w3g3T>)H=an|s*C6gPXp(;ADy&$7%QIf*{Nx~HDMdW1(I*MT{VSzmJA5&ih z^ZlTO$X{}7iBo$cEw;dz*3lyypOc0TV)Oba^5Uxkv$x{X7{xA=x>T+)N!5+eFGpZC zAzo$-Eyv63fVfoiVs_`h9or#&iP%<>0(G6*)=EkP{kS2&rp^-Uzu5sppKOLF@_mtq z%k(HLYA?Q4;UFcgOFo%i5M}e?nNw7_TjtNPYU8c4L2FfF6U$ex&)z8M{76RJjnFO8 z9V92S@jfJcBn>m)tZ7|nRpLN|2!+D5sg+Mw?xPz#L3@Q+rh4 zIjZE^eK3fCJ;!O@pWNVtEBlW3WsZi=Z_A?AP>H753~z9%l`|uYC1s1GjoitMjU7p} z;c>Y|Oi+Pq-{XG}DF4BeZ48P3K*}@>bekEa7*XSp_X5QC%Z^HpnuJ*0QP}68FgP0i zSvd3bnu2XdYt`3usm3+Hm6{a4s{GV2C~X?yr54mSR@|{s`$ZH>zSy_EoMnE1S35@# zcWHHrQc=q7Gkxn+7(5@Vb#+oIR133(i#|y45S(L-vhXN-%N*^dRXv0Edd845`62)>!mh2BZdOMuIA`2AV(s1uoxdyvInQx07o#4{9GdcCH zy)xS+S_4Jl!@{Ne8p5zY!2bW0#zBKZ4GZ)->CrR7*0lrHpER$02p= zOC~sW&;gS<3aPe87lcXA_n^uw!7OCK08G&pLYk~+WIpGnx}-=U5nb ztgfBo;E#$ye`d#^R}s_iKRDCc@o}pl&MSn}gD)?=QfrU7qld=Dc5D-@0&GM}!zXz^ zp-PI&t|HIfV^t{|-StKd3d!v9(24^q008M++6!0b)N+Wb@#WaijI=Fb!5%zU<;Pt# z10Fkb;<1i%%xA|g?FwV z37V{~C)D}U$VDdBdk1(xjq|y`^yvX)FX$EZwD`0~i#ZF0eZEfB(z>2hC^44Ad clazz = Class.forName(classForName); - - ObjectMapper mapper = new ObjectMapper(); - - new CsvToBeanBuilder(in) - .withType(clazz) - .withMultilineLimit(1) - .build() - .parse() - .forEach(line -> { - try { - writer.write(mapper.writeValueAsString(line)); - writer.newLine(); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - - writer.close(); in.close(); - if (shouldReplace) { + if (Boolean.TRUE.equals(shouldReplace)) { File f = new File("/tmp/DOAJ.csv"); f.delete(); } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java deleted file mode 100644 index 89259d814..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java +++ /dev/null @@ -1,112 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.hostedbymap; - -import java.io.*; -import java.net.URL; -import java.net.URLConnection; -import java.nio.charset.Charset; -import java.util.List; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.opencsv.bean.CsvToBeanBuilder; - -import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel; - -public class TestReadCSV { - - @Test - public void testCSVUnibi() throws FileNotFoundException { - - final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv") - .getPath(); - - List beans = new CsvToBeanBuilder(new FileReader(sourcePath)) - .withType(UnibiGoldModel.class) - .build() - .parse(); - - Assertions.assertEquals(36, beans.size()); - Assertions.assertEquals(1, beans.stream().filter(e -> e.getIssn().equals("0001-625X")).count()); - Assertions - .assertTrue( - beans - .stream() - .anyMatch(e -> e.getIssn().equals("0001-625X") && e.getTitle().equals("Acta Mycologica"))); - Assertions.assertTrue(beans.stream().allMatch(e -> e.getIssn().equals(e.getIssn_l()))); - - } - - @Disabled - @Test - public void testCSVUrlUnibi() throws IOException { - - URL csv = new URL("https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"); - - BufferedReader in = new BufferedReader(new InputStreamReader(csv.openStream())); - ObjectMapper mapper = new ObjectMapper(); - - new CsvToBeanBuilder(in) - .withType(eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel.class) - .build() - .parse() - .forEach(line -> - - { - try { - System.out.println(mapper.writeValueAsString(line)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - } - - ); - } - - @Disabled - @Test - public void testCSVUrlDOAJ() throws IOException { - - URLConnection connection = new URL("https://doaj.org/csv").openConnection(); - connection - .setRequestProperty( - "User-Agent", - "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); - connection.connect(); - - BufferedReader in = new BufferedReader( - new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8"))); - // BufferedReader in = new BufferedReader(new FileReader("/tmp/DOAJ.csv")); - PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv"))); - String line = null; - while ((line = in.readLine()) != null) { - writer.println(line.replace("\\\"", "\"")); - } - writer.close(); - in.close(); - in = new BufferedReader(new FileReader("/tmp/DOAJ_1.csv")); - ObjectMapper mapper = new ObjectMapper(); - - new CsvToBeanBuilder(in) - .withType(eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel.class) - .withMultilineLimit(1) - .build() - .parse() - .forEach(lline -> - - { - try { - System.out.println(mapper.writeValueAsString(lline)); - } catch (JsonProcessingException e) { - e.printStackTrace(); - } - } - - ); - } -} From dc8b05b39ef225c9898acc458b916331f239c25b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 10:18:25 +0200 Subject: [PATCH 100/166] Hosted By Map - changed the association with the datasource id for the hostedby element: there is no more the need to compute it. With the new HBM it is already the id in the graph --- .../main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala index 909e0f077..efd5d2497 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/DoiBoostMappingUtil.scala @@ -231,9 +231,7 @@ object DoiBoostMappingUtil { var hb = new KeyValue if (item != null) { hb.setValue(item.officialname) - hb.setKey(generateDSId(item.id)) - //TODO replace with the one above as soon as the new HBM will be used - //hb.setKey(item.id) + hb.setKey(item.id) if (item.openAccess) { i.setAccessright(getOpenAccessQualifier()) i.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) From 3359f73fcfb2ea24aec49681572ef8d606b22af6 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 12:00:42 +0200 Subject: [PATCH 101/166] cleanup & best practices --- .../dnetlib/dhp/common/collection/GetCSV.java | 51 +++++------ .../dhp/common/collection/GetCSVTest.java | 56 ++++++------ .../common/collection/models/CSVProject.java | 5 -- .../project/utils/model/CSVProject.java | 5 -- .../project/PrepareH2020ProgrammeTest.java | 2 +- .../oa/graph/hostedbymap/Aggregators.scala | 10 +-- .../dhp/oa/graph/hostedbymap/DownloadCSV.java | 85 +++++++++++++++++++ .../dhp/oa/graph/hostedbymap/GetCSV.java | 75 ---------------- .../SparkApplyHostedByMapToDatasource.scala | 2 +- .../SparkApplyHostedByMapToResult.scala | 4 +- .../SparkPrepareHostedByInfoToApply.scala | 6 +- .../hostedbymap/SparkProduceHostedByMap.scala | 2 +- .../oa/graph/hostedbymap/model/DOAJModel.java | 1 + .../graph/hostedbymap/model/EntityInfo.java | 61 +++++++------ .../hostedbymap/model/UnibiGoldModel.java | 21 +++-- .../graph/hostedbymap/oozie_app/workflow.xml | 6 +- .../dhp/oa/graph/hostedbymap/TestApply.scala | 4 +- .../oa/graph/hostedbymap/TestPrepare.scala | 14 +-- .../hostedbymap/iteminfofromhostedbymap.json | 40 ++++----- .../hostedbymap/iteminfofromhostedbymap2.json | 40 ++++----- .../graph/hostedbymap/iteminfofrompublication | 4 +- .../hostedbymap/iteminfofrompublication2 | 4 +- .../oa/graph/hostedbymap/preparedInfo.json | 2 +- .../oa/graph/hostedbymap/preparedInfo2.json | 6 +- .../graph/hostedbymap/unibi_transformed.json | 58 ++++++------- 25 files changed, 281 insertions(+), 283 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java delete mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java index 44f4121eb..44e19142c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java @@ -2,17 +2,9 @@ package eu.dnetlib.dhp.common.collection; import java.io.*; -import java.net.URL; -import java.net.URLConnection; -import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.List; -import java.util.Optional; -import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -20,17 +12,19 @@ import org.apache.hadoop.fs.Path; import com.fasterxml.jackson.databind.ObjectMapper; import com.opencsv.bean.CsvToBeanBuilder; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; - public class GetCSV { - public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath, - String modelClass) throws IOException, ClassNotFoundException { - getCsv(fileSystem, reader, hdfsPath, modelClass, ','); + public static final char DEFAULT_DELIMITER = ','; + private GetCSV() { } public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath, + String modelClass) throws IOException, ClassNotFoundException { + getCsv(fileSystem, reader, hdfsPath, modelClass, DEFAULT_DELIMITER); + } + + public static void getCsv(FileSystem fileSystem, Reader reader, String hdfsPath, String modelClass, char delimiter) throws IOException, ClassNotFoundException { Path hdfsWritePath = new Path(hdfsPath); @@ -40,26 +34,23 @@ public class GetCSV { } fsDataOutputStream = fileSystem.create(hdfsWritePath); - try(BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8))){ + try (BufferedWriter writer = new BufferedWriter( + new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8))) { - ObjectMapper mapper = new ObjectMapper(); + final ObjectMapper mapper = new ObjectMapper(); - new CsvToBeanBuilder(reader) - .withType(Class.forName(modelClass)) - .withSeparator(delimiter) - .build() - .parse() - .forEach(line -> { - try { - writer.write(mapper.writeValueAsString(line)); - writer.newLine(); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); + @SuppressWarnings("unchecked") + final List lines = new CsvToBeanBuilder(reader) + .withType(Class.forName(modelClass)) + .withSeparator(delimiter) + .build() + .parse(); + + for (Object line : lines) { + writer.write(mapper.writeValueAsString(line)); + writer.newLine(); + } } - - } } diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java index d24083e0d..c45f83828 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.common.collection; import java.io.*; import java.nio.file.Files; -import jdk.nashorn.internal.ir.annotations.Ignore; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; @@ -20,6 +19,7 @@ import eu.dnetlib.dhp.common.collection.models.CSVProgramme; import eu.dnetlib.dhp.common.collection.models.CSVProject; import eu.dnetlib.dhp.common.collection.models.DOAJModel; import eu.dnetlib.dhp.common.collection.models.UnibiGoldModel; +import jdk.nashorn.internal.ir.annotations.Ignore; public class GetCSVTest { @@ -34,11 +34,11 @@ public class GetCSVTest { String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv"; GetCSV - .getCsv( - fs, new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), - workingDir + "/programme", - "eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';'); + .getCsv( + fs, new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/programme", + "eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';'); BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); @@ -49,39 +49,39 @@ public class GetCSVTest { if (count == 0) { Assertions.assertTrue(csvp.getCode().equals("H2020-EU.5.f.")); Assertions - .assertTrue( - csvp - .getTitle() - .startsWith( - "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); + .assertTrue( + csvp + .getTitle() + .startsWith( + "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); Assertions - .assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); + .assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); Assertions.assertTrue(csvp.getShortTitle().equals("")); Assertions.assertTrue(csvp.getLanguage().equals("en")); } if (count == 28) { Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.5.4.")); Assertions - .assertTrue( - csvp - .getTitle() - .equals( - "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); + .assertTrue( + csvp + .getTitle() + .equals( + "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); Assertions - .assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); + .assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); Assertions.assertTrue(csvp.getLanguage().equals("de")); } if (count == 229) { Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.2.")); Assertions - .assertTrue( - csvp - .getTitle() - .equals( - "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); + .assertTrue( + csvp + .getTitle() + .equals( + "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); Assertions - .assertTrue( - csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); + .assertTrue( + csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); Assertions.assertTrue(csvp.getLanguage().equals("en")); } Assertions.assertTrue(csvp.getCode() != null); @@ -225,14 +225,14 @@ public class GetCSVTest { } if (count == 7904) { System.out.println(new ObjectMapper().writeValueAsString(doaj)); - Assertions.assertEquals("",doaj.getIssn()); + Assertions.assertEquals("", doaj.getIssn()); Assertions.assertEquals("2055-7159", doaj.getEissn()); Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle()); } if (count == 16707) { - Assertions.assertEquals("",doaj.getIssn()); - Assertions.assertEquals("2788-6298",doaj.getEissn()); + Assertions.assertEquals("", doaj.getIssn()); + Assertions.assertEquals("2788-6298", doaj.getEissn()); Assertions .assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle()); } diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java index 0d939ca0e..62c847907 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java @@ -19,7 +19,6 @@ public class CSVProject implements Serializable { @CsvBindByName(column = "topics") private String topics; - public String getId() { return id; } @@ -28,8 +27,6 @@ public class CSVProject implements Serializable { this.id = id; } - - public String getProgramme() { return programme; } @@ -46,6 +43,4 @@ public class CSVProject implements Serializable { this.topics = topics; } - - } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java index 73cea0539..3ddb19636 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java @@ -19,7 +19,6 @@ public class CSVProject implements Serializable { @CsvBindByName(column = "topics") private String topics; - public String getId() { return id; } @@ -28,8 +27,6 @@ public class CSVProject implements Serializable { this.id = id; } - - public String getProgramme() { return programme; } @@ -46,6 +43,4 @@ public class CSVProject implements Serializable { this.topics = topics; } - - } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java index 68aacdc8b..680872126 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/PrepareH2020ProgrammeTest.java @@ -92,7 +92,7 @@ public class PrepareH2020ProgrammeTest { Assertions.assertEquals(0, verificationDataset.filter("classification = ''").count()); - //tmp.foreach(csvProgramme -> System.out.println(OBJECT_MAPPER.writeValueAsString(csvProgramme))); + // tmp.foreach(csvProgramme -> System.out.println(OBJECT_MAPPER.writeValueAsString(csvProgramme))); Assertions .assertEquals( diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala index 8198b197a..ce383292c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/Aggregators.scala @@ -78,11 +78,11 @@ object Aggregators { if(b2 == null){ return b1 } - if(!b1.getHb_id.equals("")){ - b1.setOpenaccess(b1.getOpenaccess || b2.getOpenaccess) + if(!b1.getHostedById.equals("")){ + b1.setOpenAccess(b1.getOpenAccess || b2.getOpenAccess) return b1 } - b2.setOpenaccess(b1.getOpenaccess || b2.getOpenaccess) + b2.setOpenAccess(b1.getOpenAccess || b2.getOpenAccess) b2 } @@ -116,7 +116,7 @@ object Aggregators { if(b2 == null){ return b1 } - if(!b1.getHb_id.equals("")){ + if(!b1.getHostedById.equals("")){ return b1 } b2 @@ -131,7 +131,7 @@ object Aggregators { def datasourceToSingleId(df:Dataset[EntityInfo]): Dataset[EntityInfo] = { val transformedData : Dataset[EntityInfo] = df - .groupByKey(_.getHb_id)(Encoders.STRING) + .groupByKey(_.getHostedById)(Encoders.STRING) .agg(Aggregators.datasourceToSingleIdAggregator) .map{ case (id:String , res: EntityInfo) => res diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java new file mode 100644 index 000000000..c8329c99c --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java @@ -0,0 +1,85 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import java.io.*; +import java.util.Objects; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.GetCSV; +import eu.dnetlib.dhp.common.collection.HttpConnector2; + +public class DownloadCSV { + + private static final Logger log = LoggerFactory.getLogger(DownloadCSV.class); + + public static final char DEFAULT_DELIMITER = ';'; + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + DownloadCSV.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json")))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + log.info("fileURL {}", fileURL); + + final String workingPath = parser.get("workingPath"); + log.info("workingPath {}", workingPath); + + final String outputFile = parser.get("outputFile"); + log.info("outputFile {}", outputFile); + + final String hdfsNameNode = parser.get("hdfsNameNode"); + log.info("hdfsNameNode {}", hdfsNameNode); + + final String classForName = parser.get("classForName"); + log.info("classForName {}", classForName); + + final char delimiter = Optional + .ofNullable(parser.get("delimiter")) + .map(s -> s.charAt(0)) + .orElse(DEFAULT_DELIMITER); + log.info("delimiter {}", delimiter); + + final HttpConnector2 connector2 = new HttpConnector2(); + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + final Path path = new Path(workingPath + "/replaced.csv"); + + try (BufferedReader in = new BufferedReader( + new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { + + try (FSDataOutputStream fos = fileSystem.create(path, true)) { + String line; + while ((line = in.readLine()) != null) { + fos.writeUTF(line.replace("\\\"", "\"")); + fos.writeUTF("\n"); + } + } + } + + try (InputStreamReader reader = new InputStreamReader(fileSystem.open(path))) { + GetCSV.getCsv(fileSystem, reader, outputFile, classForName, delimiter); + } + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java deleted file mode 100644 index d7d369819..000000000 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java +++ /dev/null @@ -1,75 +0,0 @@ - -package eu.dnetlib.dhp.oa.graph.hostedbymap; - -import java.io.*; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; - -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.common.collection.HttpConnector2; - -public class GetCSV { - - public static void main(final String[] args) throws Exception { - final ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - GetCSV.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json"))); - - parser.parseArgument(args); - - final String fileURL = parser.get("fileURL"); - final String hdfsPath = parser.get("workingPath"); - final String hdfsNameNode = parser.get("hdfsNameNode"); - final String classForName = parser.get("classForName"); - final String delimiter = Optional - .ofNullable(parser.get("delimiter")) - .orElse(null); - final Boolean shouldReplace = Optional - .ofNullable((parser.get("replace"))) - .map(Boolean::valueOf) - .orElse(false); - - char del = ';'; - if (delimiter != null) { - del = delimiter.charAt(0); - } - - HttpConnector2 connector2 = new HttpConnector2(); - - BufferedReader in = new BufferedReader( - new InputStreamReader(connector2.getInputSourceAsStream(fileURL))); - - if (Boolean.TRUE.equals(shouldReplace)) { - try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/replaced.csv")))) { - String line; - while ((line = in.readLine()) != null) { - writer.println(line.replace("\\\"", "\"")); - } - } - - in.close(); - in = new BufferedReader(new FileReader("/tmp/replaced.csv")); - } - - Configuration conf = new Configuration(); - conf.set("fs.defaultFS", hdfsNameNode); - - FileSystem fileSystem = FileSystem.get(conf); - - eu.dnetlib.dhp.common.collection.GetCSV.getCsv(fileSystem, in, hdfsPath, classForName, del); - - in.close(); - if (Boolean.TRUE.equals(shouldReplace)) { - File f = new File("/tmp/DOAJ.csv"); - f.delete(); - } - - } - -} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index ae1454b47..424567830 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -15,7 +15,7 @@ import org.slf4j.{Logger, LoggerFactory} object SparkApplyHostedByMapToDatasource { def applyHBtoDats(join: Dataset[EntityInfo], dats: Dataset[Datasource]): Dataset[Datasource] = { - dats.joinWith(join, dats.col("id").equalTo(join.col("hb_id")), "left") + dats.joinWith(join, dats.col("id").equalTo(join.col("hostedById")), "left") .map(t2 => { val d: Datasource = t2._1 if (t2._2 != null) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 533e439b7..8b0b45513 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -26,9 +26,9 @@ object SparkApplyHostedByMapToResult { val i = p.getInstance().asScala if (i.size == 1) { val inst: Instance = i(0) - inst.getHostedby.setKey(ei.getHb_id) + inst.getHostedby.setKey(ei.getHostedById) inst.getHostedby.setValue(ei.getName) - if (ei.getOpenaccess) { + if (ei.getOpenAccess) { inst.setAccessright(OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN, "Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)) inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.hybrid) p.setBestaccessright(OafMapperUtils.createBestAccessRights(p.getInstance())); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala index 6db0ad33d..b7a7d352f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkPrepareHostedByInfoToApply.scala @@ -64,13 +64,13 @@ object SparkPrepareHostedByInfoToApply { } def joinResHBM(res: Dataset[EntityInfo], hbm: Dataset[EntityInfo]): Dataset[EntityInfo] = { - Aggregators.resultToSingleId(res.joinWith(hbm, res.col("journal_id").equalTo(hbm.col("journal_id")), "left") + Aggregators.resultToSingleId(res.joinWith(hbm, res.col("journalId").equalTo(hbm.col("journalId")), "left") .map(t2 => { val res: EntityInfo = t2._1 if(t2._2 != null ){ val ds = t2._2 - res.setHb_id(ds.getId) - res.setOpenaccess(ds.getOpenaccess) + res.setHostedById(ds.getId) + res.setOpenAccess(ds.getOpenAccess) res.setName(ds.getName) } res diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index de41ebf28..d0c603e29 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -111,7 +111,7 @@ object SparkProduceHostedByMap { def goldToHostedbyItemType(gold: UnibiGoldModel): HostedByItemType = { - return getHostedByItemType(Constants.UNIBI, gold.getTitle, gold.getIssn, "", gold.getIssn_l, true) + return getHostedByItemType(Constants.UNIBI, gold.getTitle, gold.getIssn, "", gold.getIssnL, true) } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java index ba804b939..4b5dc22a6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/DOAJModel.java @@ -6,6 +6,7 @@ import java.io.Serializable; import com.opencsv.bean.CsvBindByName; public class DOAJModel implements Serializable { + @CsvBindByName(column = "Journal title") private String journalTitle; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java index 1facaa792..1c6c88fe7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/EntityInfo.java @@ -5,10 +5,25 @@ import java.io.Serializable; public class EntityInfo implements Serializable { private String id; - private String journal_id; + private String journalId; private String name; - private Boolean openaccess; - private String hb_id; + private Boolean openAccess; + private String hostedById; + + public static EntityInfo newInstance(String id, String journalId, String name) { + return newInstance(id, journalId, name, false); + + } + + public static EntityInfo newInstance(String id, String journalId, String name, Boolean openaccess) { + EntityInfo pi = new EntityInfo(); + pi.id = id; + pi.journalId = journalId; + pi.name = name; + pi.openAccess = openaccess; + pi.hostedById = ""; + return pi; + } public String getId() { return id; @@ -18,12 +33,12 @@ public class EntityInfo implements Serializable { this.id = id; } - public String getJournal_id() { - return journal_id; + public String getJournalId() { + return journalId; } - public void setJournal_id(String journal_id) { - this.journal_id = journal_id; + public void setJournalId(String journalId) { + this.journalId = journalId; } public String getName() { @@ -34,35 +49,19 @@ public class EntityInfo implements Serializable { this.name = name; } - public Boolean getOpenaccess() { - return openaccess; + public Boolean getOpenAccess() { + return openAccess; } - public void setOpenaccess(Boolean openaccess) { - this.openaccess = openaccess; + public void setOpenAccess(Boolean openAccess) { + this.openAccess = openAccess; } - public String getHb_id() { - return hb_id; + public String getHostedById() { + return hostedById; } - public void setHb_id(String hb_id) { - this.hb_id = hb_id; - } - - public static EntityInfo newInstance(String id, String j_id, String name) { - return newInstance(id, j_id, name, false); - - } - - public static EntityInfo newInstance(String id, String j_id, String name, Boolean openaccess) { - EntityInfo pi = new EntityInfo(); - pi.id = id; - pi.journal_id = j_id; - pi.name = name; - pi.openaccess = openaccess; - pi.hb_id = ""; - return pi; - + public void setHostedById(String hostedById) { + this.hostedById = hostedById; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java index 0927a136b..8f24cd615 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/model/UnibiGoldModel.java @@ -6,14 +6,15 @@ import java.io.Serializable; import com.opencsv.bean.CsvBindByName; public class UnibiGoldModel implements Serializable { + @CsvBindByName(column = "ISSN") private String issn; @CsvBindByName(column = "ISSN_L") - private String issn_l; + private String issnL; @CsvBindByName(column = "TITLE") private String title; @CsvBindByName(column = "TITLE_SOURCE") - private String title_source; + private String titleSource; public String getIssn() { return issn; @@ -23,8 +24,12 @@ public class UnibiGoldModel implements Serializable { this.issn = issn; } - public String getIssn_l() { - return issn_l; + public String getIssnL() { + return issnL; + } + + public void setIssnL(String issnL) { + this.issnL = issnL; } public String getTitle() { @@ -35,11 +40,11 @@ public class UnibiGoldModel implements Serializable { this.title = title; } - public String getTitle_source() { - return title_source; + public String getTitleSource() { + return titleSource; } - public void setTitle_source(String title_source) { - this.title_source = title_source; + public void setTitleSource(String titleSource) { + this.titleSource = titleSource; } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index 26b1d5993..d7b85b0cb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -100,10 +100,11 @@ - eu.dnetlib.dhp.common.collection.GetCSV + eu.dnetlib.dhp.common.collection.DownloadCSV --hdfsNameNode${nameNode} --fileURL${unibiFileURL} --workingPath${workingDir}/unibi_gold + --outputFile${workingDir}/unibi_gold.json --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel @@ -112,10 +113,11 @@ - eu.dnetlib.dhp.common.collection.GetCSV + eu.dnetlib.dhp.common.collection.DownloadCSV --hdfsNameNode${nameNode} --fileURL${doajFileURL} --workingPath${workingDir}/doaj + --outputFile${workingDir}/doaj.json --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel --replacetrue diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala index a48e52702..1bdcb60aa 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestApply.scala @@ -117,14 +117,14 @@ class TestApply extends java.io.Serializable{ val pb : Datasource = t2._1 val pa : Datasource = t2._2 assertTrue(t2._1.getId.equals(t2._2.getId)) - if(pb.getId.equals("10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d")){ + if(pb.getId.equals("10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d")) { assertTrue(pa.getOpenairecompatibility().getClassid.equals("hostedBy")) assertTrue(pa.getOpenairecompatibility().getClassname.equals("collected from a compatible aggregator")) assertTrue(pb.getOpenairecompatibility().getClassid.equals(ModelConstants.UNKNOWN)) - }else{ + } else { assertTrue(pa.getOpenairecompatibility().getClassid.equals(pb.getOpenairecompatibility.getClassid)) assertTrue(pa.getOpenairecompatibility().getClassname.equals(pb.getOpenairecompatibility.getClassname)) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala index efd598fef..a3a753a8a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPrepare.scala @@ -75,8 +75,8 @@ class TestPrepare extends java.io.Serializable{ assertEquals(2, ds.count) - assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournal_id.equals("1728-5852")).first().getId) - assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournal_id.equals("0001-396X")).first().getId) + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournalId.equals("1728-5852")).first().getId) + assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ds.filter(ei => ei.getJournalId.equals("0001-396X")).first().getId) spark.close() } @@ -109,10 +109,10 @@ class TestPrepare extends java.io.Serializable{ val ei:EntityInfo = ds.first() assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ei.getId) - assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHb_id) - assertEquals("0001-396X", ei.getJournal_id) + assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHostedById) + assertEquals("0001-396X", ei.getJournalId) assertEquals("Academic Therapy", ei.getName) - assertTrue(!ei.getOpenaccess) + assertTrue(!ei.getOpenAccess) spark.close() } @@ -145,9 +145,9 @@ class TestPrepare extends java.io.Serializable{ val ei:EntityInfo = ds.first() assertEquals("50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9", ei.getId) - assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHb_id) + assertEquals("10|issn___print::e4b6d6d978f67520f6f37679a98c5735", ei.getHostedById) assertEquals("Academic Therapy", ei.getName) - assertTrue(ei.getOpenaccess) + assertTrue(ei.getOpenAccess) ds.foreach(e => println(mapper.writeValueAsString(e))) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json index dfb95816e..889daae47 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap.json @@ -1,20 +1,20 @@ -{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journal_id":"0001-396X","name":"Academic Therapy","openaccess":false,"hb_id":""} -{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journal_id":"0033-569X","name":"Quarterly of Applied Mathematics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journal_id":"0015-7899","name":"Forschung im Ingenieurwesen","openaccess":false,"hb_id":""} -{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journal_id":"0034-6691","name":"Review of Polarography","openaccess":false,"hb_id":""} -{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journal_id":"1434-0860","name":"Forschung im Ingenieurwesen","openaccess":true,"hb_id":""} -{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journal_id":"0035-1776","name":"Revue de Synthèse","openaccess":false,"hb_id":""} -{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"0022-0116","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} -{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journal_id":"0037-699X","name":"Slovenské divadlo","openaccess":true,"hb_id":""} -{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"1573-3564","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} -{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journal_id":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openaccess":false,"hb_id":""} -{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journal_id":"0022-0493","name":"Journal of Economic Entomology","openaccess":false,"hb_id":""} -{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journal_id":"0045-6713","name":"Children s Literature in Education","openaccess":false,"hb_id":""} -{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journal_id":"0022-4715","name":"Journal of Statistical Physics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journal_id":"0047-4800","name":"Littérature","openaccess":false,"hb_id":""} -{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"1543-8325","name":"Land Economics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journal_id":"0068-1024","name":"Brigham Young University science bulletin","openaccess":false,"hb_id":""} -{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"0023-7639","name":"Land Economics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journal_id":"0083-6656","name":"Vistas in Astronomy","openaccess":false,"hb_id":""} -{"id":"10|issn___print::91899e3872351895467856daeb798f63","journal_id":"0033-3298","name":"Public Administration","openaccess":false,"hb_id":""} -{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journal_id":"0090-502X","name":"Memory & Cognition","openaccess":false,"hb_id":""} \ No newline at end of file +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journalId":"0001-396X","name":"Academic Therapy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journalId":"0033-569X","name":"Quarterly of Applied Mathematics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journalId":"0015-7899","name":"Forschung im Ingenieurwesen","openAccess":false,"hostedById":""} +{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journalId":"0034-6691","name":"Review of Polarography","openAccess":false,"hostedById":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journalId":"1434-0860","name":"Forschung im Ingenieurwesen","openAccess":true,"hostedById":""} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journalId":"0035-1776","name":"Revue de Synthèse","openAccess":false,"hostedById":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journalId":"0022-0116","name":"Journal of Contemporary Psychotherapy","openAccess":false,"hostedById":""} +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journalId":"0037-699X","name":"Slovenské divadlo","openAccess":true,"hostedById":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journalId":"1573-3564","name":"Journal of Contemporary Psychotherapy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journalId":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openAccess":false,"hostedById":""} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journalId":"0022-0493","name":"Journal of Economic Entomology","openAccess":false,"hostedById":""} +{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journalId":"0045-6713","name":"Children s Literature in Education","openAccess":false,"hostedById":""} +{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journalId":"0022-4715","name":"Journal of Statistical Physics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journalId":"0047-4800","name":"Littérature","openAccess":false,"hostedById":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journalId":"1543-8325","name":"Land Economics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journalId":"0068-1024","name":"Brigham Young University science bulletin","openAccess":false,"hostedById":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journalId":"0023-7639","name":"Land Economics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journalId":"0083-6656","name":"Vistas in Astronomy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::91899e3872351895467856daeb798f63","journalId":"0033-3298","name":"Public Administration","openAccess":false,"hostedById":""} +{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journalId":"0090-502X","name":"Memory & Cognition","openAccess":false,"hostedById":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json index 3d2f2e005..b84f4eb86 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofromhostedbymap2.json @@ -1,20 +1,20 @@ -{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journal_id":"0001-396X","name":"Academic Therapy","openaccess":false,"hb_id":""} -{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journal_id":"0033-569X","name":"Quarterly of Applied Mathematics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journal_id":"0015-7899","name":"Forschung im Ingenieurwesen","openaccess":false,"hb_id":""} -{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journal_id":"0034-6691","name":"Review of Polarography","openaccess":false,"hb_id":""} -{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journal_id":"1434-0860","name":"Academic Therapy","openaccess":true,"hb_id":""} -{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journal_id":"0035-1776","name":"Revue de Synthèse","openaccess":false,"hb_id":""} -{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"0022-0116","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} -{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journal_id":"0037-699X","name":"Slovenské divadlo","openaccess":true,"hb_id":""} -{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journal_id":"1573-3564","name":"Journal of Contemporary Psychotherapy","openaccess":false,"hb_id":""} -{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journal_id":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openaccess":false,"hb_id":""} -{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journal_id":"0022-0493","name":"Journal of Economic Entomology","openaccess":false,"hb_id":""} -{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journal_id":"0045-6713","name":"Children s Literature in Education","openaccess":false,"hb_id":""} -{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journal_id":"0022-4715","name":"Journal of Statistical Physics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journal_id":"0047-4800","name":"Littérature","openaccess":false,"hb_id":""} -{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"1543-8325","name":"Land Economics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journal_id":"0068-1024","name":"Brigham Young University science bulletin","openaccess":false,"hb_id":""} -{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journal_id":"0023-7639","name":"Land Economics","openaccess":false,"hb_id":""} -{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journal_id":"0083-6656","name":"Vistas in Astronomy","openaccess":false,"hb_id":""} -{"id":"10|issn___print::91899e3872351895467856daeb798f63","journal_id":"0033-3298","name":"Public Administration","openaccess":false,"hb_id":""} -{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journal_id":"0090-502X","name":"Memory & Cognition","openaccess":false,"hb_id":""} \ No newline at end of file +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journalId":"0001-396X","name":"Academic Therapy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::cb21aba7985b1a0350abf99ee537302d","journalId":"0033-569X","name":"Quarterly of Applied Mathematics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::4b5605a395a243e12c95c1ecb8365107","journalId":"0015-7899","name":"Forschung im Ingenieurwesen","openAccess":false,"hostedById":""} +{"id":"10|issn___print::7977c16f0c47a3827536c7af137f6a81","journalId":"0034-6691","name":"Review of Polarography","openAccess":false,"hostedById":""} +{"id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735","journalId":"1434-0860","name":"Academic Therapy","openAccess":true,"hostedById":""} +{"id":"10|issn___print::a10bce72f7ee20cae8fffc1a167d112f","journalId":"0035-1776","name":"Revue de Synthèse","openAccess":false,"hostedById":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journalId":"0022-0116","name":"Journal of Contemporary Psychotherapy","openAccess":false,"hostedById":""} +{"id":"10|doajarticles::4f8b4cf7460320c0a80b6c6b64b3260f","journalId":"0037-699X","name":"Slovenské divadlo","openAccess":true,"hostedById":""} +{"id":"10|issn___print::a4e08f7b862090b3f07e574e0159ff70","journalId":"1573-3564","name":"Journal of Contemporary Psychotherapy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::3c7f60a71f15ecc1611fbfe07509cd5c","journalId":"0037-9697","name":"Proceedings of the Society for Analytical Chemistry","openAccess":false,"hostedById":""} +{"id":"10|issn___print::853ec7c7322ab252e0eca4d2840e7bd0","journalId":"0022-0493","name":"Journal of Economic Entomology","openAccess":false,"hostedById":""} +{"id":"10|issn___print::2a494a747066cafd64816e7495f32dc5","journalId":"0045-6713","name":"Children s Literature in Education","openAccess":false,"hostedById":""} +{"id":"10|issn___print::745f001e3f564f56a493dfea1faae501","journalId":"0022-4715","name":"Journal of Statistical Physics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::dcde40f2d085cdf9c3a5b109d4978a9c","journalId":"0047-4800","name":"Littérature","openAccess":false,"hostedById":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journalId":"1543-8325","name":"Land Economics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::480cbec18c06afa9bb7e0070948c97ff","journalId":"0068-1024","name":"Brigham Young University science bulletin","openAccess":false,"hostedById":""} +{"id":"10|issn___print::1aea1dc1fbc3153111099750884dc4e8","journalId":"0023-7639","name":"Land Economics","openAccess":false,"hostedById":""} +{"id":"10|issn___print::8cc8a1c0f0e11d4117014af5eccbbbb7","journalId":"0083-6656","name":"Vistas in Astronomy","openAccess":false,"hostedById":""} +{"id":"10|issn___print::91899e3872351895467856daeb798f63","journalId":"0033-3298","name":"Public Administration","openAccess":false,"hostedById":""} +{"id":"10|issn___print::55bb9eafabc7c310adb8bb0c336f2c26","journalId":"0090-502X","name":"Memory & Cognition","openAccess":false,"hostedById":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication index 6ee4f94be..e67c05cd0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication @@ -1,2 +1,2 @@ -{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"1728-5852","name":"","openaccess":false,"hb_id":""} -{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"0001-396X","name":"","openaccess":false,"hb_id":""} \ No newline at end of file +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"1728-5852","name":"","openAccess":false,"hostedById":""} +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"0001-396X","name":"","openAccess":false,"hostedById":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 index 59bd32d4b..e11311d0a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/iteminfofrompublication2 @@ -1,2 +1,2 @@ -{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"1434-0860","name":"","openaccess":false,"hb_id":""} -{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"0001-396X","name":"","openaccess":false,"hb_id":""} \ No newline at end of file +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"1434-0860","name":"","openAccess":false,"hostedById":""} +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"0001-396X","name":"","openAccess":false,"hostedById":""} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json index d9c201082..e037c1858 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo.json @@ -1 +1 @@ -{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journal_id":"1434-0860","name":"Academic Therapy","openaccess":true,"hb_id":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735"} \ No newline at end of file +{"id":"50|4dc99724cf04::ed1ba83e1add6ce292433729acd8b0d9","journalId":"1434-0860","name":"Academic Therapy","openAccess":true,"hostedById":"10|issn___print::e4b6d6d978f67520f6f37679a98c5735"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json index 64e2433ed..846be762a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/preparedInfo2.json @@ -1,3 +1,3 @@ -{"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} -{"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} -{"id":"pubid","journal_id":"issn","name":"ds_name","openaccess":true,"hb_id":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c"} +{"id":"pubid","journalId":"issn","name":"ds_name","openAccess":true,"hostedById":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} +{"id":"pubid","journalId":"issn","name":"ds_name","openAccess":true,"hostedById":"10|doajarticles::0ab37b7620eb9a73ac95d3ca4320c97d"} +{"id":"pubid","journalId":"issn","name":"ds_name","openAccess":true,"hostedById":"10|doajarticles::abbc9265bea9ff62776a1c39785af00c"} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json index d4acba4a9..c4ac62ff5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/unibi_transformed.json @@ -1,29 +1,29 @@ -{"issn":"2502-731X","issn_l":"2502-731X","title":"JIMKESMAS (Jurnal Ilmiah Mahasiswa Kesehatan Masyarakat)","title_source":"ROAD"} -{"issn":"2502-7409","issn_l":"1411-0253","title":"Jurnal ilmu informasi, perpustakaan, dan kearsipan","title_source":"ROAD"} -{"issn":"2502-7433","issn_l":"2502-7433","title":"At-Tadbir : jurnal ilmiah manajemen","title_source":"ROAD"} -{"issn":"2502-745X","issn_l":"2502-745X","title":"Jurnal Kesehatan Panrita Husada.","title_source":"ROAD"} -{"issn":"2502-7549","issn_l":"2502-7549","title":"ELang journal (An English Education journal)","title_source":"ROAD"} -{"issn":"2423-3633","issn_l":"2423-3625","title":"̒Ulūm-i darmāngāhī-i dāmpizishkī-i Īrān.","title_source":"ROAD"} -{"issn":"2423-5563","issn_l":"2423-3773","title":"Pizhūhishnāmah-i ̒ilm/sanjī.","title_source":"ROAD"} -{"issn":"1735-434X","issn_l":"1735-434X","title":"Iranian journal of animal biosystematics.","title_source":"ROAD"} -{"issn":"2423-4435","issn_l":"2008-6113","title":"Majallah-i jangal-i Īrān.","title_source":"ROAD"} -{"issn":"2423-4575","issn_l":"2423-4575","title":"Ābziyān-i zinatī.","title_source":"ROAD"} -{"issn":"2423-4974","issn_l":"2423-4974","title":"Pizhūhishnāmah-i ravābiṭ-i biyn/al- milal.","title_source":"ROAD"} -{"issn":"2380-0607","issn_l":"2380-0607","title":"AIHM journal club.","title_source":"ROAD"} -{"issn":"1085-4568","issn_l":"1085-4568","title":"Frontiers.","title_source":"ROAD"} -{"issn":"2380-8845","issn_l":"2380-8845","title":"˜The œjournal of contemporary archival studies.","title_source":"ROAD"} -{"issn":"2381-1803","issn_l":"2381-1803","title":"International journal of complementary & alternative medicine.","title_source":"ROAD"} -{"issn":"2381-2478","issn_l":"2381-2478","title":"Palapala.","title_source":"ROAD"} -{"issn":"2382-5170","issn_l":"2382-5170","title":"Asia pacific journal of environment ecology and sustainable development.","title_source":"ROAD"} -{"issn":"2382-9737","issn_l":"2382-9737","title":"Majallah-i salāmat va bihdāsht","title_source":"ROAD"} -{"issn":"2382-977X","issn_l":"2382-977X","title":"UCT journal of research in science ,engineering and technology","title_source":"ROAD"} -{"issn":"2382-9974","issn_l":"2382-9974","title":"Bih/nizhādī-i giyāhān-i zirā̒ī va bāghī.","title_source":"ROAD"} -{"issn":"2227-4782","issn_l":"2227-4782","title":"Problemi endokrinnoï patologìï.","title_source":"ROAD"} -{"issn":"2685-0079","issn_l":"2597-4971","title":"Jurnal Kebijakan Pembangunan Daerah : Jurnal Penelitian dan Pengembangan Kebijakan Pembangunan Daerah.","title_source":"ROAD"} -{"issn":"2574-0075","issn_l":"2574-0075","title":"Hypermedia magazine.","title_source":"ROAD"} -{"issn":"2574-0296","issn_l":"2574-0296","title":"˜The œmuseum review.","title_source":"ROAD"} -{"issn":"2574-0334","issn_l":"2574-0334","title":"Bioactive compounds in health and disease.","title_source":"ROAD"} -{"issn":"2574-108X","issn_l":"2574-108X","title":"Journal of computer science integration.","title_source":"ROAD"} -{"issn":"2574-254X","issn_l":"2574-254X","title":"Child and adolescent obesity.","title_source":"ROAD"} -{"issn":"2574-3325","issn_l":"2574-3325","title":"Journal of research on the college president.","title_source":"ROAD"} -{"issn":"2239-6101","issn_l":"2239-5938","title":"European journal of sustainable development.","title_source":"ROAD"} \ No newline at end of file +{"issn":"2502-731X","issnL":"2502-731X","title":"JIMKESMAS (Jurnal Ilmiah Mahasiswa Kesehatan Masyarakat)","titleSource":"ROAD"} +{"issn":"2502-7409","issnL":"1411-0253","title":"Jurnal ilmu informasi, perpustakaan, dan kearsipan","titleSource":"ROAD"} +{"issn":"2502-7433","issnL":"2502-7433","title":"At-Tadbir : jurnal ilmiah manajemen","titleSource":"ROAD"} +{"issn":"2502-745X","issnL":"2502-745X","title":"Jurnal Kesehatan Panrita Husada.","titleSource":"ROAD"} +{"issn":"2502-7549","issnL":"2502-7549","title":"ELang journal (An English Education journal)","titleSource":"ROAD"} +{"issn":"2423-3633","issnL":"2423-3625","title":"̒Ulūm-i darmāngāhī-i dāmpizishkī-i Īrān.","titleSource":"ROAD"} +{"issn":"2423-5563","issnL":"2423-3773","title":"Pizhūhishnāmah-i ̒ilm/sanjī.","titleSource":"ROAD"} +{"issn":"1735-434X","issnL":"1735-434X","title":"Iranian journal of animal biosystematics.","titleSource":"ROAD"} +{"issn":"2423-4435","issnL":"2008-6113","title":"Majallah-i jangal-i Īrān.","titleSource":"ROAD"} +{"issn":"2423-4575","issnL":"2423-4575","title":"Ābziyān-i zinatī.","titleSource":"ROAD"} +{"issn":"2423-4974","issnL":"2423-4974","title":"Pizhūhishnāmah-i ravābiṭ-i biyn/al- milal.","titleSource":"ROAD"} +{"issn":"2380-0607","issnL":"2380-0607","title":"AIHM journal club.","titleSource":"ROAD"} +{"issn":"1085-4568","issnL":"1085-4568","title":"Frontiers.","titleSource":"ROAD"} +{"issn":"2380-8845","issnL":"2380-8845","title":"˜The œjournal of contemporary archival studies.","titleSource":"ROAD"} +{"issn":"2381-1803","issnL":"2381-1803","title":"International journal of complementary & alternative medicine.","titleSource":"ROAD"} +{"issn":"2381-2478","issnL":"2381-2478","title":"Palapala.","titleSource":"ROAD"} +{"issn":"2382-5170","issnL":"2382-5170","title":"Asia pacific journal of environment ecology and sustainable development.","titleSource":"ROAD"} +{"issn":"2382-9737","issnL":"2382-9737","title":"Majallah-i salāmat va bihdāsht","titleSource":"ROAD"} +{"issn":"2382-977X","issnL":"2382-977X","title":"UCT journal of research in science ,engineering and technology","titleSource":"ROAD"} +{"issn":"2382-9974","issnL":"2382-9974","title":"Bih/nizhādī-i giyāhān-i zirā̒ī va bāghī.","titleSource":"ROAD"} +{"issn":"2227-4782","issnL":"2227-4782","title":"Problemi endokrinnoï patologìï.","titleSource":"ROAD"} +{"issn":"2685-0079","issnL":"2597-4971","title":"Jurnal Kebijakan Pembangunan Daerah : Jurnal Penelitian dan Pengembangan Kebijakan Pembangunan Daerah.","titleSource":"ROAD"} +{"issn":"2574-0075","issnL":"2574-0075","title":"Hypermedia magazine.","titleSource":"ROAD"} +{"issn":"2574-0296","issnL":"2574-0296","title":"˜The œmuseum review.","titleSource":"ROAD"} +{"issn":"2574-0334","issnL":"2574-0334","title":"Bioactive compounds in health and disease.","titleSource":"ROAD"} +{"issn":"2574-108X","issnL":"2574-108X","title":"Journal of computer science integration.","titleSource":"ROAD"} +{"issn":"2574-254X","issnL":"2574-254X","title":"Child and adolescent obesity.","titleSource":"ROAD"} +{"issn":"2574-3325","issnL":"2574-3325","title":"Journal of research on the college president.","titleSource":"ROAD"} +{"issn":"2239-6101","issnL":"2239-5938","title":"European journal of sustainable development.","titleSource":"ROAD"} \ No newline at end of file From baed5e3337def46cef839a4010c2b4715d8e3f81 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 12:14:47 +0200 Subject: [PATCH 102/166] test classes moved in specific components --- .../dhp/common/collection/GetCSVTest.java | 246 ------------------ .../collection/models/CSVProgramme.java | 80 ------ .../common/collection/models/CSVProject.java | 46 ---- .../common/collection/models/DOAJModel.java | 52 ---- .../collection/models/UnibiGoldModel.java | 45 ---- .../project/DownloadCsvTest.java | 145 +++++++++++ .../oa/graph/hostedbymap/DownloadCsvTest.java | 139 ++++++++++ 7 files changed, 284 insertions(+), 469 deletions(-) delete mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java delete mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java delete mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java delete mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java delete mode 100644 dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/DownloadCsvTest.java create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java deleted file mode 100644 index c45f83828..000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java +++ /dev/null @@ -1,246 +0,0 @@ - -package eu.dnetlib.dhp.common.collection; - -import java.io.*; -import java.nio.file.Files; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.fs.Path; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.common.collection.models.CSVProgramme; -import eu.dnetlib.dhp.common.collection.models.CSVProject; -import eu.dnetlib.dhp.common.collection.models.DOAJModel; -import eu.dnetlib.dhp.common.collection.models.UnibiGoldModel; -import jdk.nashorn.internal.ir.annotations.Ignore; - -public class GetCSVTest { - - private static String workingDir; - - private static LocalFileSystem fs; - - @Disabled - @Test - void getProgrammeFileTest() throws Exception { - - String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv"; - - GetCSV - .getCsv( - fs, new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), - workingDir + "/programme", - "eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';'); - - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - CSVProgramme csvp = new ObjectMapper().readValue(line, CSVProgramme.class); - if (count == 0) { - Assertions.assertTrue(csvp.getCode().equals("H2020-EU.5.f.")); - Assertions - .assertTrue( - csvp - .getTitle() - .startsWith( - "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); - Assertions - .assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); - Assertions.assertTrue(csvp.getShortTitle().equals("")); - Assertions.assertTrue(csvp.getLanguage().equals("en")); - } - if (count == 28) { - Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.5.4.")); - Assertions - .assertTrue( - csvp - .getTitle() - .equals( - "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); - Assertions - .assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); - Assertions.assertTrue(csvp.getLanguage().equals("de")); - } - if (count == 229) { - Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.2.")); - Assertions - .assertTrue( - csvp - .getTitle() - .equals( - "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); - Assertions - .assertTrue( - csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); - Assertions.assertTrue(csvp.getLanguage().equals("en")); - } - Assertions.assertTrue(csvp.getCode() != null); - Assertions.assertTrue(csvp.getCode().startsWith("H2020")); - count += 1; - } - - Assertions.assertEquals(767, count); - } - - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory(GetCSVTest.class.getSimpleName()) - .toString(); - - fs = FileSystem.getLocal(new Configuration()); - } - - @Disabled - @Test - void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException { - String fileURL = "https://cordis.europa.eu/data/cordis-h2020projects.csv"; - // String fileURL = "/Users/miriam.baglioni/Downloads/cordis-h2020projects.csv"; - - GetCSV - .getCsv( - fs, - new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))) - // new BufferedReader(new FileReader(fileURL)) - , workingDir + "/projects", - "eu.dnetlib.dhp.common.collection.models.CSVProject", ';'); - - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects")))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - CSVProject csvp = new ObjectMapper().readValue(line, CSVProject.class); - if (count == 0) { - Assertions.assertTrue(csvp.getId().equals("771736")); - Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.1.")); - Assertions.assertTrue(csvp.getTopics().equals("ERC-2017-COG")); - - } - if (count == 22882) { - Assertions.assertTrue(csvp.getId().equals("752903")); - Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.3.2.")); - Assertions.assertTrue(csvp.getTopics().equals("MSCA-IF-2016")); - } - if (count == 223023) { - Assertions.assertTrue(csvp.getId().equals("861952")); - Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.4.e.")); - Assertions.assertTrue(csvp.getTopics().equals("SGA-SEWP-COST-2019")); - } - Assertions.assertTrue(csvp.getId() != null); - Assertions.assertTrue(csvp.getProgramme().startsWith("H2020")); - count += 1; - } - - Assertions.assertEquals(34957, count); - } - - @Disabled - @Test - void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException { - - String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; - - GetCSV - .getCsv( - fs, new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), - workingDir + "/programme", - "eu.dnetlib.dhp.common.collection.models.UnibiGoldModel", ','); - - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class); - if (count == 0) { - Assertions.assertTrue(unibi.getIssn().equals("0001-625X")); - Assertions.assertTrue(unibi.getIssn_l().equals("0001-625X")); - Assertions.assertTrue(unibi.getTitle().equals("Acta Mycologica")); - - } - if (count == 43158) { - Assertions.assertTrue(unibi.getIssn().equals("2088-6330")); - Assertions.assertTrue(unibi.getIssn_l().equals("2088-6330")); - Assertions.assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama")); - - } - if (count == 67027) { - Assertions.assertTrue(unibi.getIssn().equals("2658-7068")); - Assertions.assertTrue(unibi.getIssn_l().equals("2308-2488")); - Assertions.assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ.")); - } - - count += 1; - } - - Assertions.assertEquals(67028, count); - } - - @Disabled - @Test - void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException { - - String fileURL = "https://doaj.org/csv"; - - try (BufferedReader in = new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))) { - try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")))) { - String line; - while ((line = in.readLine()) != null) { - writer.println(line.replace("\\\"", "\"")); - } - } - } - - GetCSV - .getCsv( - fs, new BufferedReader( - new FileReader("/tmp/DOAJ_1.csv")), - workingDir + "/programme", - "eu.dnetlib.dhp.common.collection.models.DOAJModel", ','); - - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); - - String line; - int count = 0; - while ((line = in.readLine()) != null) { - DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); - if (count == 0) { - Assertions.assertEquals("0001-3765", doaj.getIssn()); - Assertions.assertEquals("1678-2690", doaj.getEissn()); - Assertions.assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle()); - - } - if (count == 7904) { - System.out.println(new ObjectMapper().writeValueAsString(doaj)); - Assertions.assertEquals("", doaj.getIssn()); - Assertions.assertEquals("2055-7159", doaj.getEissn()); - Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle()); - } - if (count == 16707) { - - Assertions.assertEquals("", doaj.getIssn()); - Assertions.assertEquals("2788-6298", doaj.getEissn()); - Assertions - .assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle()); - } - - count += 1; - } - - Assertions.assertEquals(16713, count); - } - -} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java deleted file mode 100644 index d17fcc75e..000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java +++ /dev/null @@ -1,80 +0,0 @@ - -package eu.dnetlib.dhp.common.collection.models; - -import java.io.Serializable; - -import com.opencsv.bean.CsvBindByName; -import com.opencsv.bean.CsvIgnore; - -/** - * The model for the programme csv file - */ -public class CSVProgramme implements Serializable { - - @CsvBindByName(column = "code") - private String code; - - @CsvBindByName(column = "title") - private String title; - - @CsvBindByName(column = "shortTitle") - private String shortTitle; - - @CsvBindByName(column = "language") - private String language; - - @CsvIgnore - private String classification; - - @CsvIgnore - private String classification_short; - - public String getClassification_short() { - return classification_short; - } - - public void setClassification_short(String classification_short) { - this.classification_short = classification_short; - } - - public String getClassification() { - return classification; - } - - public void setClassification(String classification) { - this.classification = classification; - } - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getShortTitle() { - return shortTitle; - } - - public void setShortTitle(String shortTitle) { - this.shortTitle = shortTitle; - } - - public String getLanguage() { - return language; - } - - public void setLanguage(String language) { - this.language = language; - } - -} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java deleted file mode 100644 index 62c847907..000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java +++ /dev/null @@ -1,46 +0,0 @@ - -package eu.dnetlib.dhp.common.collection.models; - -import java.io.Serializable; - -import com.opencsv.bean.CsvBindByName; - -/** - * the mmodel for the projects csv file - */ -public class CSVProject implements Serializable { - - @CsvBindByName(column = "id") - private String id; - - @CsvBindByName(column = "programme") - private String programme; - - @CsvBindByName(column = "topics") - private String topics; - - public String getId() { - return id; - } - - public void setId(String id) { - this.id = id; - } - - public String getProgramme() { - return programme; - } - - public void setProgramme(String programme) { - this.programme = programme; - } - - public String getTopics() { - return topics; - } - - public void setTopics(String topics) { - this.topics = topics; - } - -} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java deleted file mode 100644 index 156ba3632..000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java +++ /dev/null @@ -1,52 +0,0 @@ - -package eu.dnetlib.dhp.common.collection.models; - -import java.io.Serializable; - -import com.opencsv.bean.CsvBindByName; - -public class DOAJModel implements Serializable { - @CsvBindByName(column = "Journal title") - private String journalTitle; - - @CsvBindByName(column = "Journal ISSN (print version)") - private String issn; - - @CsvBindByName(column = "Journal EISSN (online version)") - private String eissn; - - @CsvBindByName(column = "Review process") - private String reviewProcess; - - public String getJournalTitle() { - return journalTitle; - } - - public void setJournalTitle(String journalTitle) { - this.journalTitle = journalTitle; - } - - public String getIssn() { - return issn; - } - - public void setIssn(String issn) { - this.issn = issn; - } - - public String getEissn() { - return eissn; - } - - public void setEissn(String eissn) { - this.eissn = eissn; - } - - public String getReviewProcess() { - return reviewProcess; - } - - public void setReviewProcess(String reviewProcess) { - this.reviewProcess = reviewProcess; - } -} diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java deleted file mode 100644 index ae47262bf..000000000 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java +++ /dev/null @@ -1,45 +0,0 @@ - -package eu.dnetlib.dhp.common.collection.models; - -import java.io.Serializable; - -import com.opencsv.bean.CsvBindByName; - -public class UnibiGoldModel implements Serializable { - @CsvBindByName(column = "ISSN") - private String issn; - @CsvBindByName(column = "ISSN_L") - private String issn_l; - @CsvBindByName(column = "TITLE") - private String title; - @CsvBindByName(column = "TITLE_SOURCE") - private String title_source; - - public String getIssn() { - return issn; - } - - public void setIssn(String issn) { - this.issn = issn; - } - - public String getIssn_l() { - return issn_l; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getTitle_source() { - return title_source; - } - - public void setTitle_source(String title_source) { - this.title_source = title_source; - } -} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/DownloadCsvTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/DownloadCsvTest.java new file mode 100644 index 000000000..700b9e632 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/DownloadCsvTest.java @@ -0,0 +1,145 @@ +package eu.dnetlib.dhp.actionmanager.project; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme; +import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.GetCSV; +import eu.dnetlib.dhp.common.collection.HttpConnector2; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.jupiter.api.*; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Files; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class DownloadCsvTest { + + private static String workingDir; + + private static LocalFileSystem fs; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(DownloadCsvTest.class.getSimpleName()) + .toString(); + + fs = FileSystem.getLocal(new Configuration()); + } + + @Disabled + @Test + void getProgrammeFileTest() throws Exception { + + String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv"; + + GetCSV + .getCsv( + fs, new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/programme", + CSVProgramme.class.getName(), ';'); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + CSVProgramme csvp = new ObjectMapper().readValue(line, CSVProgramme.class); + if (count == 0) { + assertTrue(csvp.getCode().equals("H2020-EU.5.f.")); + assertTrue( + csvp + .getTitle() + .startsWith( + "Develop the governance for the advancement of responsible research and innovation by all stakeholders")); + assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation")); + assertTrue(csvp.getShortTitle().equals("")); + assertTrue(csvp.getLanguage().equals("en")); + } + if (count == 28) { + assertTrue(csvp.getCode().equals("H2020-EU.3.5.4.")); + assertTrue( + csvp + .getTitle() + .equals( + "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation")); + assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation")); + assertTrue(csvp.getLanguage().equals("de")); + } + if (count == 229) { + assertTrue(csvp.getCode().equals("H2020-EU.3.2.")); + assertTrue( + csvp + .getTitle() + .equals( + "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy")); + assertTrue( + csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy")); + assertTrue(csvp.getLanguage().equals("en")); + } + assertTrue(csvp.getCode() != null); + assertTrue(csvp.getCode().startsWith("H2020")); + count += 1; + } + + Assertions.assertEquals(767, count); + } + + @Disabled + @Test + void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException { + String fileURL = "https://cordis.europa.eu/data/cordis-h2020projects.csv"; + + GetCSV + .getCsv( + fs, + new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))) + , workingDir + "/projects", + CSVProject.class.getName(), ';'); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + CSVProject csvp = new ObjectMapper().readValue(line, CSVProject.class); + if (count == 0) { + assertTrue(csvp.getId().equals("771736")); + assertTrue(csvp.getProgramme().equals("H2020-EU.1.1.")); + assertTrue(csvp.getTopics().equals("ERC-2017-COG")); + + } + if (count == 22882) { + assertTrue(csvp.getId().equals("752903")); + assertTrue(csvp.getProgramme().equals("H2020-EU.1.3.2.")); + assertTrue(csvp.getTopics().equals("MSCA-IF-2016")); + } + if (count == 223023) { + assertTrue(csvp.getId().equals("861952")); + assertTrue(csvp.getProgramme().equals("H2020-EU.4.e.")); + assertTrue(csvp.getTopics().equals("SGA-SEWP-COST-2019")); + } + assertTrue(csvp.getId() != null); + assertTrue(csvp.getProgramme().startsWith("H2020")); + count += 1; + } + + Assertions.assertEquals(34957, count); + } + + @AfterAll + public static void cleanup() { + FileUtils.deleteQuietly(new File(workingDir)); + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java new file mode 100644 index 000000000..b93320e1a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java @@ -0,0 +1,139 @@ +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.GetCSV; +import eu.dnetlib.dhp.common.collection.HttpConnector2; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.jupiter.api.*; + +import java.io.*; +import java.nio.file.Files; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class DownloadCsvTest { + + private static String workingDir; + + private static LocalFileSystem fs; + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(DownloadCsvTest.class.getSimpleName()) + .toString(); + + fs = FileSystem.getLocal(new Configuration()); + } + + @Disabled + @Test + void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException { + + String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; + + GetCSV + .getCsv( + fs, new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), + workingDir + "/programme", + UnibiGoldModel.class.getName()); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class); + if (count == 0) { + assertTrue(unibi.getIssn().equals("0001-625X")); + assertTrue(unibi.getIssnL().equals("0001-625X")); + assertTrue(unibi.getTitle().equals("Acta Mycologica")); + + } + if (count == 43158) { + assertTrue(unibi.getIssn().equals("2088-6330")); + assertTrue(unibi.getIssnL().equals("2088-6330")); + assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama")); + + } + if (count == 67027) { + assertTrue(unibi.getIssn().equals("2658-7068")); + assertTrue(unibi.getIssnL().equals("2308-2488")); + assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ.")); + } + + count += 1; + } + + Assertions.assertEquals(67028, count); + } + + @Disabled + @Test + void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException { + + String fileURL = "https://doaj.org/csv"; + + try (BufferedReader in = new BufferedReader( + new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))) { + try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")))) { + String line; + while ((line = in.readLine()) != null) { + writer.println(line.replace("\\\"", "\"")); + } + } + } + + GetCSV + .getCsv( + fs, new BufferedReader( + new FileReader("/tmp/DOAJ_1.csv")), + workingDir + "/programme", + DOAJModel.class.getName()); + + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + + String line; + int count = 0; + while ((line = in.readLine()) != null) { + DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); + if (count == 0) { + Assertions.assertEquals("0001-3765", doaj.getIssn()); + Assertions.assertEquals("1678-2690", doaj.getEissn()); + Assertions.assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle()); + + } + if (count == 7904) { + System.out.println(new ObjectMapper().writeValueAsString(doaj)); + Assertions.assertEquals("", doaj.getIssn()); + Assertions.assertEquals("2055-7159", doaj.getEissn()); + Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle()); + } + if (count == 16707) { + + Assertions.assertEquals("", doaj.getIssn()); + Assertions.assertEquals("2788-6298", doaj.getEissn()); + Assertions + .assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle()); + } + + count += 1; + } + + Assertions.assertEquals(16713, count); + } + + @AfterAll + public static void cleanup() { + FileUtils.deleteQuietly(new File(workingDir)); + } + +} From c3ad4ab701af352805b776a23ee9db7c426adab9 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 12:23:15 +0200 Subject: [PATCH 103/166] minor fixes --- .../oa/graph/hostedbymap/SparkProduceHostedByMap.scala | 4 ++-- .../oa/graph/hostedbymap/download_csv_parameters.json | 9 +++------ .../dhp/oa/graph/hostedbymap/oozie_app/workflow.xml | 6 ++---- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala index d0c603e29..1ee1d5d1a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkProduceHostedByMap.scala @@ -209,8 +209,8 @@ object SparkProduceHostedByMap { Aggregators.explodeHostedByItemType(oaHostedByDataset(spark, datasourcePath) - .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold")) - .union(doajHostedByDataset(spark, workingDirPath + "/doaj")) + .union(goldHostedByDataset(spark, workingDirPath + "/unibi_gold.json")) + .union(doajHostedByDataset(spark, workingDirPath + "/doaj.json")) .flatMap(hbi => toList(hbi))).filter(hbi => hbi._2.id.startsWith("10|")) .map(hbi => toHostedByMap(hbi))(Encoders.STRING) .rdd.saveAsTextFile(outputPath , classOf[GzipCodec]) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json index fba048343..cf417c675 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json @@ -1,13 +1,10 @@ - [ - { "paramName":"fu", "paramLongName":"fileURL", "paramDescription": "the url to download the csv file ", "paramRequired": true }, - { "paramName":"wp", "paramLongName":"workingPath", @@ -27,9 +24,9 @@ "paramRequired": true }, { - "paramName": "sr", - "paramLongName": "replace", - "paramDescription": "true if the input file has to be cleaned before parsing", + "paramName": "d", + "paramLongName": "delimiter", + "paramDescription": "csv delimiter character", "paramRequired": false } ] diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index d7b85b0cb..870b4ba0f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -78,7 +78,6 @@ - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -92,7 +91,6 @@ - @@ -100,7 +98,7 @@ - eu.dnetlib.dhp.common.collection.DownloadCSV + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV --hdfsNameNode${nameNode} --fileURL${unibiFileURL} --workingPath${workingDir}/unibi_gold @@ -113,7 +111,7 @@ - eu.dnetlib.dhp.common.collection.DownloadCSV + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV --hdfsNameNode${nameNode} --fileURL${doajFileURL} --workingPath${workingDir}/doaj From 7ee2757fcd97d2d842638f4e52290df8217f465b Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 12:41:01 +0200 Subject: [PATCH 104/166] fixed DownloadCSV parameters spec; workflow patching the hostedby replaces the graph content (publication, datasource) rather than creating a copy --- .../SparkApplyHostedByMapToDatasource.scala | 13 ++++++++----- .../hostedbymap/SparkApplyHostedByMapToResult.scala | 7 +++++-- .../graph/hostedbymap/download_csv_parameters.json | 6 ++++++ 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala index 424567830..1b18ba3ae 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToDatasource.scala @@ -27,8 +27,8 @@ object SparkApplyHostedByMapToDatasource { d })(Encoders.bean((classOf[Datasource]))) } - def main(args: Array[String]): Unit = { + def main(args: Array[String]): Unit = { val logger: Logger = LoggerFactory.getLogger(getClass) val conf: SparkConf = new SparkConf() @@ -41,18 +41,15 @@ object SparkApplyHostedByMapToDatasource { .appName(getClass.getSimpleName) .master(parser.get("master")).getOrCreate() - val graphPath = parser.get("graphPath") - val outputPath = parser.get("outputPath") val preparedInfoPath = parser.get("preparedInfoPath") - implicit val formats = DefaultFormats - implicit val mapEncoderPubs: Encoder[Datasource] = Encoders.bean(classOf[Datasource]) implicit val mapEncoderEinfo: Encoder[EntityInfo] = Encoders.bean(classOf[EntityInfo]) + val mapper = new ObjectMapper() val dats : Dataset[Datasource] = spark.read.textFile(graphPath + "/datasource") @@ -62,6 +59,12 @@ object SparkApplyHostedByMapToDatasource { .map(ei => mapper.readValue(ei, classOf[EntityInfo]))) applyHBtoDats(pinfo, dats).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) + + spark.read.textFile(outputPath) + .write + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .text(graphPath + "/datasource") } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index 8b0b45513..db7ab4ac0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -75,8 +75,11 @@ object SparkApplyHostedByMapToResult { applyHBtoPubs(pinfo, pubs).write.mode(SaveMode.Overwrite).option("compression","gzip").json(outputPath) - - + spark.read.textFile(outputPath) + .write + .mode(SaveMode.Overwrite) + .option("compression","gzip") + .text(graphPath + "/publication") } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json index cf417c675..22c65eb35 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json @@ -11,6 +11,12 @@ "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", "paramRequired": true }, + { + "paramName":"of", + "paramLongName":"outputFile", + "paramDescription": "the output json file produced by the CSV downlaod procedure", + "paramRequired": true + }, { "paramName": "hnn", "paramLongName": "hdfsNameNode", From 17cefe6a97edd8434ce806af5064ce1b766adedd Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 12:43:59 +0200 Subject: [PATCH 105/166] [HBM] removed stale replace option --- .../eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index 870b4ba0f..a03f4e36a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -117,7 +117,6 @@ --workingPath${workingDir}/doaj --outputFile${workingDir}/doaj.json --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel - --replacetrue From 5f0903d50d6abff7ae5ae98a71b53d0045d62ae8 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 14:17:54 +0200 Subject: [PATCH 106/166] fixed CSV downloader & tests --- .../dhp/oa/graph/hostedbymap/DownloadCSV.java | 26 ++++++++---- .../oa/graph/hostedbymap/DownloadCsvTest.java | 42 ++++++++----------- 2 files changed, 36 insertions(+), 32 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java index c8329c99c..be35d31f8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java @@ -2,9 +2,12 @@ package eu.dnetlib.dhp.oa.graph.hostedbymap; import java.io.*; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Objects; import java.util.Optional; +import eu.dnetlib.dhp.common.collection.CollectorException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -56,30 +59,37 @@ public class DownloadCSV { .orElse(DEFAULT_DELIMITER); log.info("delimiter {}", delimiter); - final HttpConnector2 connector2 = new HttpConnector2(); - Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfsNameNode); FileSystem fileSystem = FileSystem.get(conf); + + new DownloadCSV().doDownload(fileURL, workingPath, outputFile, classForName, delimiter, fileSystem); + + } + + protected void doDownload(String fileURL, String workingPath, String outputFile, String classForName, char delimiter, FileSystem fs) + throws IOException, ClassNotFoundException, CollectorException { + + final HttpConnector2 connector2 = new HttpConnector2(); + final Path path = new Path(workingPath + "/replaced.csv"); try (BufferedReader in = new BufferedReader( new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { - try (FSDataOutputStream fos = fileSystem.create(path, true)) { + try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fs.create(path, true), Charset.defaultCharset()))) { String line; while ((line = in.readLine()) != null) { - fos.writeUTF(line.replace("\\\"", "\"")); - fos.writeUTF("\n"); + writer.write(line.replace("\\\"", "\"")); + writer.newLine(); } } } - try (InputStreamReader reader = new InputStreamReader(fileSystem.open(path))) { - GetCSV.getCsv(fileSystem, reader, outputFile, classForName, delimiter); + try (InputStreamReader reader = new InputStreamReader(fs.open(path))) { + GetCSV.getCsv(fs, reader, outputFile, classForName, delimiter); } - } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java index b93320e1a..7b02025fb 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java @@ -39,14 +39,16 @@ public class DownloadCsvTest { String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; - GetCSV - .getCsv( - fs, new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))), - workingDir + "/programme", - UnibiGoldModel.class.getName()); + final String outputFile = workingDir + "/unibi_gold.json"; + new DownloadCSV().doDownload( + fileURL, + workingDir + "/unibi_gold", + outputFile, + UnibiGoldModel.class.getName(), + ',', + fs); - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); String line; int count = 0; @@ -82,24 +84,16 @@ public class DownloadCsvTest { String fileURL = "https://doaj.org/csv"; - try (BufferedReader in = new BufferedReader( - new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))) { - try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")))) { - String line; - while ((line = in.readLine()) != null) { - writer.println(line.replace("\\\"", "\"")); - } - } - } + final String outputFile = workingDir + "/doaj.json"; + new DownloadCSV().doDownload( + fileURL, + workingDir + "/doaj", + outputFile, + DOAJModel.class.getName(), + ',', + fs); - GetCSV - .getCsv( - fs, new BufferedReader( - new FileReader("/tmp/DOAJ_1.csv")), - workingDir + "/programme", - DOAJModel.class.getName()); - - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme")))); + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); String line; int count = 0; From f74adc475275b32abb4a66a6ae3844fd566f4bff Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 15:52:15 +0200 Subject: [PATCH 107/166] added DownloadCSV2 as alternative implementation of the same download procedure --- .../dhp/oa/graph/hostedbymap/DownloadCSV.java | 14 +- .../oa/graph/hostedbymap/DownloadCSV2.java | 84 +++++++ .../hostedbymap/download_csv_parameters.json | 6 +- .../graph/hostedbymap/oozie_app/workflow.xml | 13 +- .../oa/graph/hostedbymap/DownloadCsvTest.java | 210 ++++++++++-------- 5 files changed, 214 insertions(+), 113 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java index be35d31f8..dff761c34 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV.java @@ -7,16 +7,15 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; import java.util.Optional; -import eu.dnetlib.dhp.common.collection.CollectorException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.CollectorException; import eu.dnetlib.dhp.common.collection.GetCSV; import eu.dnetlib.dhp.common.collection.HttpConnector2; @@ -68,8 +67,9 @@ public class DownloadCSV { } - protected void doDownload(String fileURL, String workingPath, String outputFile, String classForName, char delimiter, FileSystem fs) - throws IOException, ClassNotFoundException, CollectorException { + protected void doDownload(String fileURL, String workingPath, String outputFile, String classForName, + char delimiter, FileSystem fs) + throws IOException, ClassNotFoundException, CollectorException { final HttpConnector2 connector2 = new HttpConnector2(); @@ -78,11 +78,11 @@ public class DownloadCSV { try (BufferedReader in = new BufferedReader( new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { - try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fs.create(path, true), Charset.defaultCharset()))) { + try (PrintWriter writer = new PrintWriter( + new OutputStreamWriter(fs.create(path, true), StandardCharsets.UTF_8))) { String line; while ((line = in.readLine()) != null) { - writer.write(line.replace("\\\"", "\"")); - writer.newLine(); + writer.println(line.replace("\\\"", "\"")); } } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java new file mode 100644 index 000000000..d82d00862 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCSV2.java @@ -0,0 +1,84 @@ + +package eu.dnetlib.dhp.oa.graph.hostedbymap; + +import java.io.*; +import java.util.Objects; +import java.util.Optional; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.GetCSV; +import eu.dnetlib.dhp.common.collection.HttpConnector2; + +public class DownloadCSV2 { + + private static final Logger log = LoggerFactory.getLogger(DownloadCSV2.class); + + public static final char DEFAULT_DELIMITER = ';'; + + public static void main(final String[] args) throws Exception { + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + DownloadCSV2.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json")))); + + parser.parseArgument(args); + + final String fileURL = parser.get("fileURL"); + log.info("fileURL {}", fileURL); + + final String tmpFile = parser.get("tmpFile"); + log.info("tmpFile {}", tmpFile); + + final String outputFile = parser.get("outputFile"); + log.info("outputFile {}", outputFile); + + final String hdfsNameNode = parser.get("hdfsNameNode"); + log.info("hdfsNameNode {}", hdfsNameNode); + + final String classForName = parser.get("classForName"); + log.info("classForName {}", classForName); + + final char delimiter = Optional + .ofNullable(parser.get("delimiter")) + .map(s -> s.charAt(0)) + .orElse(DEFAULT_DELIMITER); + log.info("delimiter {}", delimiter); + + HttpConnector2 connector2 = new HttpConnector2(); + + try (BufferedReader in = new BufferedReader( + new InputStreamReader(connector2.getInputSourceAsStream(fileURL)))) { + + try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(tmpFile)))) { + String line; + while ((line = in.readLine()) != null) { + writer.println(line.replace("\\\"", "\"")); + } + } + } + + try (BufferedReader in = new BufferedReader(new FileReader(tmpFile))) { + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", hdfsNameNode); + + FileSystem fileSystem = FileSystem.get(conf); + + GetCSV.getCsv(fileSystem, in, outputFile, classForName, delimiter); + } finally { + FileUtils.deleteQuietly(new File(tmpFile)); + } + + } + +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json index 22c65eb35..50fbb00f0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/download_csv_parameters.json @@ -6,9 +6,9 @@ "paramRequired": true }, { - "paramName":"wp", - "paramLongName":"workingPath", - "paramDescription": "the path where to find the pre-processed data for unibi gold list and doj artciles", + "paramName":"tf", + "paramLongName":"tmpFile", + "paramDescription": "the temporary local file storing the cleaned CSV contents for unibi gold list and doj artciles", "paramRequired": true }, { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml index a03f4e36a..84035fe4e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml @@ -73,7 +73,8 @@ - ${wf:conf('resumeFrom') eq 'ProduceHBM'} + ${wf:conf('resumeFrom') eq 'ProduceHBM'} + ${wf:conf('resumeFrom') eq 'download_csv'} @@ -98,10 +99,10 @@ - eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV2 --hdfsNameNode${nameNode} --fileURL${unibiFileURL} - --workingPath${workingDir}/unibi_gold + --tmpFile/tmp/unibi_gold_replaced.csv --outputFile${workingDir}/unibi_gold.json --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel @@ -111,10 +112,10 @@ - eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV + eu.dnetlib.dhp.oa.graph.hostedbymap.DownloadCSV2 --hdfsNameNode${nameNode} --fileURL${doajFileURL} - --workingPath${workingDir}/doaj + --tmpFile/tmp/doaj_replaced.csv --outputFile${workingDir}/doaj.json --classForNameeu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel @@ -141,7 +142,7 @@ --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} --datasourcePath${sourcePath}/datasource - --workingPath${workingDir} + --workingPath/user/${wf:user()}/data --outputPath${hostedByMapPath} --masteryarn-cluster diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java index 7b02025fb..edf74fc6a 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/DownloadCsvTest.java @@ -1,133 +1,149 @@ + package eu.dnetlib.dhp.oa.graph.hostedbymap; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.common.collection.CollectorException; -import eu.dnetlib.dhp.common.collection.GetCSV; -import eu.dnetlib.dhp.common.collection.HttpConnector2; -import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel; -import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Files; + import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; -import org.junit.jupiter.api.*; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import java.io.*; -import java.nio.file.Files; +import com.fasterxml.jackson.databind.ObjectMapper; -import static org.junit.jupiter.api.Assertions.assertTrue; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel; +import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel; public class DownloadCsvTest { - private static String workingDir; + private static final Logger log = LoggerFactory.getLogger(DownloadCsvTest.class); - private static LocalFileSystem fs; + private static String workingDir; - @BeforeAll - public static void beforeAll() throws IOException { - workingDir = Files - .createTempDirectory(DownloadCsvTest.class.getSimpleName()) - .toString(); + private static LocalFileSystem fs; - fs = FileSystem.getLocal(new Configuration()); - } + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(DownloadCsvTest.class.getSimpleName()) + .toString(); - @Disabled - @Test - void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException { + fs = FileSystem.getLocal(new Configuration()); + } - String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; + @Disabled + @Test + void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException { - final String outputFile = workingDir + "/unibi_gold.json"; - new DownloadCSV().doDownload( - fileURL, - workingDir + "/unibi_gold", - outputFile, - UnibiGoldModel.class.getName(), - ',', - fs); + String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv"; - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); + final String outputFile = workingDir + "/unibi_gold.json"; + new DownloadCSV() + .doDownload( + fileURL, + workingDir + "/unibi_gold", + outputFile, + UnibiGoldModel.class.getName(), + ',', + fs); - String line; - int count = 0; - while ((line = in.readLine()) != null) { - UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class); - if (count == 0) { - assertTrue(unibi.getIssn().equals("0001-625X")); - assertTrue(unibi.getIssnL().equals("0001-625X")); - assertTrue(unibi.getTitle().equals("Acta Mycologica")); + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); - } - if (count == 43158) { - assertTrue(unibi.getIssn().equals("2088-6330")); - assertTrue(unibi.getIssnL().equals("2088-6330")); - assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama")); + String line; + int count = 0; + while ((line = in.readLine()) != null) { + UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class); + if (count == 0) { + assertTrue(unibi.getIssn().equals("0001-625X")); + assertTrue(unibi.getIssnL().equals("0001-625X")); + assertTrue(unibi.getTitle().equals("Acta Mycologica")); - } - if (count == 67027) { - assertTrue(unibi.getIssn().equals("2658-7068")); - assertTrue(unibi.getIssnL().equals("2308-2488")); - assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ.")); - } + } + if (count == 43158) { + assertTrue(unibi.getIssn().equals("2088-6330")); + assertTrue(unibi.getIssnL().equals("2088-6330")); + assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama")); - count += 1; - } + } + if (count == 67027) { + assertTrue(unibi.getIssn().equals("2658-7068")); + assertTrue(unibi.getIssnL().equals("2308-2488")); + assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ.")); + } - Assertions.assertEquals(67028, count); - } + count += 1; + } - @Disabled - @Test - void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException { + assertEquals(67028, count); + } - String fileURL = "https://doaj.org/csv"; + @Disabled + @Test + void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException { - final String outputFile = workingDir + "/doaj.json"; - new DownloadCSV().doDownload( - fileURL, - workingDir + "/doaj", - outputFile, - DOAJModel.class.getName(), - ',', - fs); + String fileURL = "https://doaj.org/csv"; - BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); + final String outputFile = workingDir + "/doaj.json"; + new DownloadCSV() + .doDownload( + fileURL, + workingDir + "/doaj", + outputFile, + DOAJModel.class.getName(), + ',', + fs); - String line; - int count = 0; - while ((line = in.readLine()) != null) { - DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); - if (count == 0) { - Assertions.assertEquals("0001-3765", doaj.getIssn()); - Assertions.assertEquals("1678-2690", doaj.getEissn()); - Assertions.assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle()); + BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(outputFile)))); - } - if (count == 7904) { - System.out.println(new ObjectMapper().writeValueAsString(doaj)); - Assertions.assertEquals("", doaj.getIssn()); - Assertions.assertEquals("2055-7159", doaj.getEissn()); - Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle()); - } - if (count == 16707) { + String line; + int count = 0; + while ((line = in.readLine()) != null) { + DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class); + if (count == 0) { + assertEquals("0001-3765", doaj.getIssn()); + assertEquals("1678-2690", doaj.getEissn()); + assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle()); + } + if (count == 22) { + log.info(new ObjectMapper().writeValueAsString(doaj)); + System.out.println(new ObjectMapper().writeValueAsString(doaj)); + } + if (count == 7904) { + // log.info(new ObjectMapper().writeValueAsString(doaj)); + assertEquals("", doaj.getIssn()); + assertEquals("2055-7159", doaj.getEissn()); + assertEquals("BJR|case reports", doaj.getJournalTitle()); + } + if (count == 16707) { - Assertions.assertEquals("", doaj.getIssn()); - Assertions.assertEquals("2788-6298", doaj.getEissn()); - Assertions - .assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle()); - } + assertEquals("2783-1043", doaj.getIssn()); + assertEquals("2783-1051", doaj.getEissn()); + assertEquals("فیزیک کاربردی ایران", doaj.getJournalTitle()); + } - count += 1; - } + count += 1; + } - Assertions.assertEquals(16713, count); - } + assertEquals(16715, count); + } - @AfterAll - public static void cleanup() { - FileUtils.deleteQuietly(new File(workingDir)); - } + @AfterAll + public static void cleanup() { + FileUtils.deleteQuietly(new File(workingDir)); + } } From 7743d0f9193e2fc9837f7e101ed25e398296b8c3 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 16:14:54 +0200 Subject: [PATCH 108/166] consolidated dnet wf profiles into the same submodule --- .../dedup}/dataset_dedup_configuration.xml | 0 ...herresearchproduct_dedup_configuration.xml | 0 .../publication_dedup_configuration.xml | 0 .../result_deduplication_orchestrator.xml | 0 .../dedup}/software_dedup_configuration.xml | 0 .../dnetlib/dhp/provision/02_beta_graph.xml | 25 + dhp-workflows/dhp-workflow-profiles/pom.xml | 27 - .../wf/profiles/graph_beta_construction.xml | 779 ------------------ .../wf/profiles/graph_prod_construction.xml | 705 ---------------- .../dhp/wf/profiles/graph_to_hiveDB.xml | 73 -- .../dnetlib/dhp/wf/profiles/update_solr.xml | 98 --- .../dnetlib/dhp/wf/profiles/update_stats.xml | 74 -- 12 files changed, 25 insertions(+), 1756 deletions(-) rename dhp-workflows/{dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles => dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup}/dataset_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles => dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup}/otherresearchproduct_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles => dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup}/publication_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles => dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup}/result_deduplication_orchestrator.xml (100%) rename dhp-workflows/{dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles => dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup}/software_dedup_configuration.xml (100%) delete mode 100644 dhp-workflows/dhp-workflow-profiles/pom.xml delete mode 100644 dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_beta_construction.xml delete mode 100644 dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_prod_construction.xml delete mode 100644 dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_to_hiveDB.xml delete mode 100644 dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_solr.xml delete mode 100644 dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_stats.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/dataset_dedup_configuration.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/dataset_dedup_configuration.xml rename to dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/otherresearchproduct_dedup_configuration.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/otherresearchproduct_dedup_configuration.xml rename to dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/publication_dedup_configuration.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/publication_dedup_configuration.xml rename to dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/result_deduplication_orchestrator.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/result_deduplication_orchestrator.xml rename to dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/software_dedup_configuration.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/software_dedup_configuration.xml rename to dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml index 766783f8b..344e99044 100644 --- a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml +++ b/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml @@ -660,6 +660,31 @@ build-report + + + + + + updates publication's hostedby info according to the ISSNs available from DOAJ and UNIBI + + executeOozieJob + IIS + + { + 'sourcePath' : 'cleanedFirstGraphPath' + } + + + { + 'resumeFrom' : 'produceHBM', + 'hostedByMapPath' : '/user/dnet.beta/data', + 'oozie.wf.application.path' : '/lib/dnet/BETA/oa/graph/hostedbymap/oozie_app', + 'workingDir' : '/tmp/beta_provision/working_dir/hostedbymap', + 'outputPath' : '/tmp/beta_provision/working_dir/hostedbymap' + } + + build-report + diff --git a/dhp-workflows/dhp-workflow-profiles/pom.xml b/dhp-workflows/dhp-workflow-profiles/pom.xml deleted file mode 100644 index 54e76c1e2..000000000 --- a/dhp-workflows/dhp-workflow-profiles/pom.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - - dhp-workflows - eu.dnetlib.dhp - 1.2.4-SNAPSHOT - - 4.0.0 - - dhp-workflow-profiles - jar - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_beta_construction.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_beta_construction.xml deleted file mode 100644 index 08ed24cd0..000000000 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_beta_construction.xml +++ /dev/null @@ -1,779 +0,0 @@ - -
- - - - - -
- - Graph Construction [HYBRID] - Data Provision - 30 - - - - reuse cached content from the PROD aggregation system - - reuseProdContent - true - - - - - - - - set the PROD aggregator content path - - prodContentPath - /tmp/core_aggregator - - - - - - - - Set the path containing the PROD AGGREGATOR graph - - prodAggregatorGraphPath - /tmp/core_provision/graph/00_prod_graph_aggregator - - - - - - - - reuse cached content from the BETA aggregation system - - reuseBetaContent - true - - - - - - - - set the BETA aggregator content path - - betaContentPath - /tmp/beta_aggregator - - - - - - - - Set the path containing the BETA AGGREGATOR graph - - betaAggregatorGraphPath - /tmp/core_provision/graph/00_beta_graph_aggregator - - - - - - - - Set the IS lookup service address - - isLookUpUrl - http://services.openaire.eu:8280/is/services/isLookUp?wsdl - - - - - - - - Set the target path to store the MERGED graph - - mergedGraphPath - /tmp/core_provision/graph/01_graph_merged - - - - - - - - Set the target path to store the RAW graph - - rawGraphPath - /tmp/core_provision/graph/02_graph_raw - - - - - - - - Set the target path to store the DEDUPED graph - - dedupGraphPath - /tmp/core_provision/graph/03_graph_dedup - - - - - - - - Set the target path to store the INFERRED graph - - inferredGraphPath - /tmp/core_provision/graph/04_graph_inferred - - - - - - - - Set the target path to store the CONSISTENCY graph - - consistentGraphPath - /tmp/core_provision/graph/05_graph_consistent - - - - - - - - Set the target path to store the ORCID enriched graph - - orcidGraphPath - /tmp/core_provision/graph/06_graph_orcid - - - - - - - - Set the target path to store the BULK TAGGED graph - - bulkTaggingGraphPath - /tmp/core_provision/graph/07_graph_bulktagging - - - - - - - - Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph - - affiliationGraphPath - /tmp/core_provision/graph/08_graph_affiliation - - - - - - - - Set the target path to store the COMMUNITY from SELECTED SOURCES graph - - communityOrganizationGraphPath - /tmp/core_provision/graph/09_graph_comunity_organization - - - - - - - - Set the target path to store the FUNDING from SEMANTIC RELATION graph - - fundingGraphPath - /tmp/core_provision/graph/10_graph_funding - - - - - - - - Set the target path to store the COMMUNITY from SEMANTIC RELATION graph - - communitySemRelGraphPath - /tmp/core_provision/graph/11_graph_comunity_sem_rel - - - - - - - - Set the target path to store the COUNTRY enriched graph - - countryGraphPath - /tmp/core_provision/graph/12_graph_country - - - - - - - - Set the target path to store the CLEANED graph - - cleanedGraphPath - /tmp/core_provision/graph/13_graph_cleaned - - - - - - - - Set the target path to store the blacklisted graph - - blacklistedGraphPath - /tmp/core_provision/graph/14_graph_blacklisted - - - - - - - - Set the map of paths for the Bulk Tagging - - bulkTaggingPathMap - {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} - - - - - - - - Set the map of associations organization, community list for the propagation of community to result through organization - - propagationOrganizationCommunityMap - {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], - "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]} - - - - - - - - - Set the dedup orchestrator name - - dedupConfig - decisiontree-dedup-test - - - - - - - - declares the ActionSet ids to promote in the RAW graph - - actionSetIdsRawGraph - scholexplorer-dump,gridac-dump,doiboost-organizations,doiboost,orcidworks-no-doi,iis-wos-entities,iis-entities-software,iis-entities-patent - - - - - - - - declares the ActionSet ids to promote in the INFERRED graph - - actionSetIdsIISGraph - iis-researchinitiative,iis-document-citations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-datasets-preprocessing,iis-referenced-projects-main,iis-referenced-projects-preprocessing,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19 - - - - - - - - wait configurations - - - - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'graphOutputPath' : 'betaAggregatorGraphPath', - 'isLookupUrl' : 'isLookUpUrl', - 'reuseContent' : 'reuseBetaContent', - 'contentPath' : 'betaContentPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/raw_all/oozie_app', - 'mongoURL' : 'mongodb://beta.services.openaire.eu', - 'mongoDb' : 'mdstore', - 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', - 'postgresPassword' : '', - 'workingDir' : '/tmp/core_provision/working_dir/beta_aggregator' - } - - build-report - - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'graphOutputPath' : 'prodAggregatorGraphPath', - 'isLookupUrl' : 'isLookUpUrl', - 'reuseContent' : 'reuseProdContent', - 'contentPath' : 'prodContentPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/raw_all/oozie_app', - 'mongoURL' : 'mongodb://services.openaire.eu', - 'mongoDb' : 'mdstore', - 'postgresURL' : 'jdbc:postgresql://postgresql.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', - 'postgresPassword' : '', - 'workingDir' : '/tmp/core_provision/working_dir/prod_aggregator' - } - - build-report - - - - - - - - wait configurations - - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'betaInputGgraphPath' : 'betaAggregatorGraphPath', - 'prodInputGgraphPath' : 'prodAggregatorGraphPath', - 'graphOutputPath' : 'mergedGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/merge/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/merge_graph' - } - - build-report - - - - - - - - create the RAW graph - - executeOozieJob - IIS - - { - 'inputActionSetIds' : 'actionSetIdsRawGraph', - 'inputGraphRootPath' : 'mergedGraphPath', - 'outputGraphRootPath' : 'rawGraphPath', - 'isLookupUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app', - 'sparkExecutorCores' : '3', - 'sparkExecutorMemory' : '10G', - 'activePromoteDatasetActionPayload' : 'true', - 'activePromoteDatasourceActionPayload' : 'true', - 'activePromoteOrganizationActionPayload' : 'true', - 'activePromoteOtherResearchProductActionPayload' : 'true', - 'activePromoteProjectActionPayload' : 'true', - 'activePromotePublicationActionPayload' : 'true', - 'activePromoteRelationActionPayload' : 'true', - 'activePromoteResultActionPayload' : 'true', - 'activePromoteSoftwareActionPayload' : 'true', - 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', - 'workingDir' : '/tmp/core_provision/working_dir/promoteActionsRaw' - } - - build-report - - - - - - - - search for duplicates in the raw graph - - executeOozieJob - IIS - - { - 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'rawGraphPath', - 'dedupGraphPath': 'dedupGraphPath', - 'isLookUpUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/dedup/scan/oozie_app', - 'workingPath' : '/tmp/core_provision/working_dir/dedup' - } - - build-report - - - - - - - - create the INFERRED graph - - executeOozieJob - IIS - - { - 'inputActionSetIds' : 'actionSetIdsIISGraph', - 'inputGraphRootPath' : 'dedupGraphPath', - 'outputGraphRootPath' : 'inferredGraphPath', - 'isLookupUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app', - 'sparkExecutorCores' : '3', - 'sparkExecutorMemory' : '10G', - 'activePromoteDatasetActionPayload' : 'true', - 'activePromoteDatasourceActionPayload' : 'true', - 'activePromoteOrganizationActionPayload' : 'true', - 'activePromoteOtherResearchProductActionPayload' : 'true', - 'activePromoteProjectActionPayload' : 'true', - 'activePromotePublicationActionPayload' : 'true', - 'activePromoteRelationActionPayload' : 'true', - 'activePromoteResultActionPayload' : 'true', - 'activePromoteSoftwareActionPayload' : 'true', - 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', - 'workingDir' : '/tmp/core_provision/working_dir/promoteActionsIIS' - } - - build-report - - - - - - - - mark duplicates as deleted and redistribute the relationships - - executeOozieJob - IIS - - { - 'graphBasePath' : 'inferredGraphPath', - 'dedupGraphPath': 'consistentGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/dedup/consistency/oozie_app', - 'workingPath' : '/tmp/core_provision/working_dir/dedup' - } - - build-report - - - - - - - - propagates ORCID among results linked by allowedsemrels semantic relationships - - executeOozieJob - IIS - - { - 'sourcePath' : 'consistentGraphPath', - 'outputPath': 'orcidGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/orcidtoresultfromsemrel/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/orcid', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - mark results respecting some rules as belonging to communities - - executeOozieJob - IIS - - { - 'sourcePath' : 'orcidGraphPath', - 'outputPath': 'bulkTaggingGraphPath', - 'isLookUpUrl' : 'isLookUpUrl', - 'pathMap' : 'bulkTaggingPathMap' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/bulktag/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/bulktag' - } - - build-report - - - - - - - - creates relashionships between results and organizations when the organizations are associated to institutional repositories - - executeOozieJob - IIS - - { - 'sourcePath' : 'bulkTaggingGraphPath', - 'outputPath': 'affiliationGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/affiliation/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/affiliation', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap - - executeOozieJob - IIS - - { - 'sourcePath' : 'affiliationGraphPath', - 'outputPath': 'communityOrganizationGraphPath', - 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_organization/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/community_organization', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects - - executeOozieJob - IIS - - { - 'sourcePath' : 'communityOrganizationGraphPath', - 'outputPath': 'fundingGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/funding/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/funding', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities - - executeOozieJob - IIS - - { - 'sourcePath' : 'fundingGraphPath', - 'outputPath': 'communitySemRelGraphPath', - 'isLookUpUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_semrel/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/community_semrel', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from - - executeOozieJob - IIS - - { - 'sourcePath' : 'communitySemRelGraphPath', - 'outputPath': 'countryGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/country/oozie_app', - 'sparkExecutorCores' : '3', - 'sparkExecutorMemory' : '10G', - 'workingDir' : '/tmp/core_provision/working_dir/country', - 'allowedtypes' : 'pubsrepository::institutional', - 'whitelist' : '10|opendoar____::300891a62162b960cf02ce3827bb363c', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid - - executeOozieJob - IIS - - { - 'graphInputPath' : 'countryGraphPath', - 'graphOutputPath': 'cleanedGraphPath', - 'isLookupUrl': 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/clean/oozie_app', - 'workingPath' : '/tmp/core_provision/working_dir/clean' - } - - build-report - - - - - - - - removes blacklisted relations - - executeOozieJob - IIS - - { - 'sourcePath' : 'cleanedGraphPath', - 'outputPath': 'blacklistedGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/blacklist/oozie_app', - 'workingDir' : '/tmp/core_provision/working_dir/blacklist', - 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', - 'postgresPassword' : '' - } - - build-report - - - - - - - - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 - SUCCESS - - - -
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_prod_construction.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_prod_construction.xml deleted file mode 100644 index 4eab12c73..000000000 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_prod_construction.xml +++ /dev/null @@ -1,705 +0,0 @@ - -
- - - - - -
- - Graph Construction [PROD] - Data Provision - 30 - - - - reuse cached content from the aggregation system - - reuseContent - true - - - - - - - - set the aggregator content path - - contentPath - /tmp/beta_aggregator - - - - - - - - Set the path containing the AGGREGATOR graph - - aggregatorGraphPath - /tmp/beta_provision/graph/00_graph_aggregator - - - - - - - - Set the target path to store the RAW graph - - rawGraphPath - /tmp/beta_provision/graph/01_graph_raw - - - - - - - - Set the target path to store the first CLEANED graph - - firstCleanedGraphPath - /tmp/prod_provision/graph/02_graph_first_cleaned - - - - - - - - Set the target path to store the DEDUPED graph - - dedupGraphPath - /tmp/beta_provision/graph/03_graph_dedup - - - - - - - - Set the target path to store the INFERRED graph - - inferredGraphPath - /tmp/beta_provision/graph/04_graph_inferred - - - - - - - - Set the target path to store the CONSISTENCY graph - - consistentGraphPath - /tmp/beta_provision/graph/05_graph_consistent - - - - - - - - Set the target path to store the ORCID enriched graph - - orcidGraphPath - /tmp/beta_provision/graph/06_graph_orcid - - - - - - - - Set the target path to store the BULK TAGGED graph - - bulkTaggingGraphPath - /tmp/beta_provision/graph/07_graph_bulktagging - - - - - - - - Set the target path to store the AFFILIATION from INSTITUTIONAL REPOS graph - - affiliationGraphPath - /tmp/beta_provision/graph/08_graph_affiliation - - - - - - - - Set the target path to store the COMMUNITY from SELECTED SOURCES graph - - communityOrganizationGraphPath - /tmp/beta_provision/graph/09_graph_comunity_organization - - - - - - - - Set the target path to store the FUNDING from SEMANTIC RELATION graph - - fundingGraphPath - /tmp/beta_provision/graph/10_graph_funding - - - - - - - - Set the target path to store the COMMUNITY from SEMANTIC RELATION graph - - communitySemRelGraphPath - /tmp/beta_provision/graph/11_graph_comunity_sem_rel - - - - - - - - Set the target path to store the COUNTRY enriched graph - - countryGraphPath - /tmp/beta_provision/graph/12_graph_country - - - - - - - - Set the target path to store the CLEANED graph - - cleanedGraphPath - /tmp/beta_provision/graph/13_graph_cleaned - - - - - - - - Set the target path to store the blacklisted graph - - blacklistedGraphPath - /tmp/beta_provision/graph/14_graph_blacklisted - - - - - - - - Set the lookup address - - isLookUpUrl - http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl - - - - - - - - Set the map of paths for the Bulk Tagging - - bulkTaggingPathMap - {"author" : "$['author'][*]['fullname']", "title" : "$['title'][*]['value']", "orcid" : "$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']", "contributor" : "$['contributor'][*]['value']", "description" : "$['description'][*]['value']"} - - - - - - - - Set the map of associations organization, community list for the propagation of community to result through organization - - propagationOrganizationCommunityMap - {"20|corda__h2020::3fb05a9524c3f790391261347852f638":["mes","euromarine"], "20|corda__h2020::e8dbe14cca9bf6fce09d468872f813f8":["mes","euromarine"], "20|snsf________::9b253f265e3bef5cae6d881fdf61aceb":["mes","euromarine"],"20|rcuk________::e054eea0a47665af8c3656b5785ccf76":["mes","euromarine"],"20|corda__h2020::edc18d67c9b11fb616ca9f6e1db1b151":["mes","euromarine"],"20|rcuk________::d5736d9da90521ddcdc7828a05a85e9a":["mes","euromarine"],"20|corda__h2020::f5d418d3aa1cf817ddefcc3fdc039f27":["mes","euromarine"],"20|snsf________::8fa091f8f25a846779acb4ea97b50aef":["mes","euromarine"],"20|corda__h2020::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|corda_______::81e020977211c2c40fae2e1a50bffd71":["mes","euromarine"],"20|snsf________::31d0a100e54e3cdb3c6f52d91e638c78":["mes","euromarine"],"20|corda__h2020::ea379ef91b8cc86f9ac5edc4169292db":["mes","euromarine"],"20|corda__h2020::f75ee2ee48e5cb0ec8c8d30aaa8fef70":["mes","euromarine"],"20|rcuk________::e16010089551a1a9182a94604fc0ea59":["mes","euromarine"],"20|corda__h2020::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|corda_______::38531a2cce7c5c347ffc439b07c1f43b":["mes","euromarine"],"20|grid________::b2cbbf5eadbbf87d534b022bad3191d7":["mes","euromarine"],"20|snsf________::74730ef1439d7f7636a8be58a6b471b8":["mes","euromarine"],"20|nsf_________::ad72e19043a5a467e35f9b444d11563e":["mes","euromarine"],"20|rcuk________::0fc3e92500290902a2d38ec2445e74c3":["mes","euromarine"],"20|grid________::ad2c29905da0eb3c06b3fa80cacd89ea":["mes","euromarine"],"20|corda__h2020::30b53e4d63d3724f00acb9cbaca40860":["mes","euromarine"],"20|corda__h2020::f60f84bee14ad93f0db0e49af1d5c317":["mes","euromarine"], "20|corda__h2020::7bf251ac3765b5e89d82270a1763d09f":["mes","euromarine"], "20|corda__h2020::65531bd11be9935948c7f2f4db1c1832":["mes","euromarine"], "20|corda__h2020::e0e98f86bbc76638bbb72a8fe2302946":["mes","euromarine"], "20|snsf________::3eb43582ac27601459a8d8b3e195724b":["mes","euromarine"], "20|corda__h2020::af2481dab65d06c8ea0ae02b5517b9b6":["mes","euromarine"], "20|corda__h2020::c19d05cfde69a50d3ebc89bd0ee49929":["mes","euromarine"], "20|corda__h2020::af0bfd9fc09f80d9488f56d71a9832f0":["mes","euromarine"], "20|rcuk________::f33c02afb0dc66c49d0ed97ca5dd5cb0":["beopen"], - "20|grid________::a867f78acdc5041b34acfe4f9a349157":["beopen"], "20|grid________::7bb116a1a9f95ab812bf9d2dea2be1ff":["beopen"], "20|corda__h2020::6ab0e0739dbe625b99a2ae45842164ad":["beopen"], "20|corda__h2020::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda_______::8ba50792bc5f4d51d79fca47d860c602":["beopen"], "20|corda__h2020::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::e70e9114979e963eef24666657b807c3":["beopen"], "20|corda_______::15911e01e9744d57205825d77c218737":["beopen"], "20|opendoar____::056a41e24e2a9a67215e87bbee6a80ab":["beopen"], "20|opendoar____::7f67f2e6c6fbb0628f8160fcd3d92ae3":["beopen"], "20|grid________::a8ecfd7c084e561168bcbe6bf0daf3e3":["beopen"], "20|corda_______::7bbe6cc5d8ec1864739a04b0d020c9e9":["beopen"], "20|corda_______::3ff558e30c2e434d688539548300b050":["beopen"], "20|corda__h2020::5ffee5b3b83b33a8cf0e046877bd3a39":["beopen"], "20|corda__h2020::5187217e2e806a6df3579c46f82401bc":["beopen"], "20|grid________::5fa7e2709bcd945e26bfa18689adeec1":["beopen"], "20|corda_______::d8696683c53027438031a96ad27c3c07":["beopen"], "20|corda__h2020::d8696683c53027438031a96ad27c3c07":["beopen"], "20|rcuk________::23a79ebdfa59790864e4a485881568c1":["beopen"], "20|corda__h2020::b76cf8fe49590a966953c37e18608af9":["beopen"], "20|grid________::d2f0204126ee709244a488a4cd3b91c2":["beopen"], "20|corda__h2020::05aba9d2ed17533d15221e5655ac11e6":["beopen"], "20|grid________::802401579481dc32062bdee69f5e6a34":["beopen"], "20|corda__h2020::3f6d9d54cac975a517ba6b252c81582d":["beopen"]} - - - - - - - - - Set the dedup orchestrator name - - dedupConfig - decisiontree-dedup-test - - - - - - - - declares the ActionSet ids to promote in the RAW graph - - actionSetIdsRawGraph - scholexplorer-dump,gridac-dump,doiboost-organizations,doiboost,orcidworks-no-doi,iis-wos-entities,iis-entities-software,iis-entities-patent - - - - - - - - declares the ActionSet ids to promote in the INFERRED graph - - actionSetIdsIISGraph - iis-researchinitiative,iis-document-citations,iis-document-affiliation,iis-document-classes,iis-document-similarities,iis-referenced-datasets-main,iis-referenced-datasets-preprocessing,iis-referenced-projects-main,iis-referenced-projects-preprocessing,iis-referenceextraction-pdb,document_software_url,iis-extracted-metadata,iis-communities,iis-referenced-patents,iis-covid-19 - - - - - - - - wait configurations - - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'graphOutputPath' : 'aggregatorGraphPath', - 'isLookupUrl' : 'isLookUpUrl', - 'reuseContent' : 'reuseContent', - 'contentPath' : 'contentPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/raw_all/oozie_app', - 'mongoURL' : 'mongodb://beta.services.openaire.eu', - 'mongoDb' : 'mdstore', - 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', - 'postgresPassword' : '', - 'workingDir' : '/tmp/beta_provision/working_dir/aggregator' - } - - build-report - - - - - - - - create the RAW graph - - executeOozieJob - IIS - - { - 'inputActionSetIds' : 'actionSetIdsRawGraph', - 'inputGraphRootPath' : 'aggregatorGraphPath', - 'outputGraphRootPath' : 'rawGraphPath', - 'isLookupUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app', - 'sparkExecutorCores' : '3', - 'sparkExecutorMemory' : '10G', - 'activePromoteDatasetActionPayload' : 'true', - 'activePromoteDatasourceActionPayload' : 'true', - 'activePromoteOrganizationActionPayload' : 'true', - 'activePromoteOtherResearchProductActionPayload' : 'true', - 'activePromoteProjectActionPayload' : 'true', - 'activePromotePublicationActionPayload' : 'true', - 'activePromoteRelationActionPayload' : 'true', - 'activePromoteResultActionPayload' : 'true', - 'activePromoteSoftwareActionPayload' : 'true', - 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', - 'workingDir' : '/tmp/beta_provision/working_dir/promoteActionsRaw' - } - - build-report - - - - - - - - clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid - - executeOozieJob - IIS - - { - 'graphInputPath' : 'rawGraphPath', - 'graphOutputPath': 'firstCleanedGraphPath', - 'isLookupUrl': 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/clean/oozie_app', - 'workingPath' : '/tmp/beta_provision/working_dir/first_clean' - } - - build-report - - - - - - - - search for duplicates in the raw graph - - executeOozieJob - IIS - - { - 'actionSetId' : 'dedupConfig', - 'graphBasePath' : 'firstCleanedGraphPath', - 'dedupGraphPath': 'dedupGraphPath', - 'isLookUpUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/dedup/scan/oozie_app', - 'workingPath' : '/tmp/beta_provision/working_dir/dedup' - } - - build-report - - - - - - - - create the INFERRED graph - - executeOozieJob - IIS - - { - 'inputActionSetIds' : 'actionSetIdsIISGraph', - 'inputGraphRootPath' : 'dedupGraphPath', - 'outputGraphRootPath' : 'inferredGraphPath', - 'isLookupUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/actionmanager/wf/main/oozie_app', - 'sparkExecutorCores' : '3', - 'sparkExecutorMemory' : '10G', - 'activePromoteDatasetActionPayload' : 'true', - 'activePromoteDatasourceActionPayload' : 'true', - 'activePromoteOrganizationActionPayload' : 'true', - 'activePromoteOtherResearchProductActionPayload' : 'true', - 'activePromoteProjectActionPayload' : 'true', - 'activePromotePublicationActionPayload' : 'true', - 'activePromoteRelationActionPayload' : 'true', - 'activePromoteResultActionPayload' : 'true', - 'activePromoteSoftwareActionPayload' : 'true', - 'mergeAndGetStrategy' : 'MERGE_FROM_AND_GET', - 'workingDir' : '/tmp/beta_provision/working_dir/promoteActionsIIS' - } - - build-report - - - - - - - - mark duplicates as deleted and redistribute the relationships - - executeOozieJob - IIS - - { - 'graphBasePath' : 'inferredGraphPath', - 'dedupGraphPath': 'consistentGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/dedup/consistency/oozie_app', - 'workingPath' : '/tmp/beta_provision/working_dir/dedup' - } - - build-report - - - - - - - - propagates ORCID among results linked by allowedsemrels semantic relationships - - executeOozieJob - IIS - - { - 'sourcePath' : 'consistentGraphPath', - 'outputPath': 'orcidGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/orcidtoresultfromsemrel/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/orcid', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - mark results respecting some rules as belonging to communities - - executeOozieJob - IIS - - { - 'sourcePath' : 'orcidGraphPath', - 'outputPath': 'bulkTaggingGraphPath', - 'isLookUpUrl' : 'isLookUpUrl', - 'pathMap' : 'bulkTaggingPathMap' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/bulktag/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/bulktag' - } - - build-report - - - - - - - - creates relashionships between results and organizations when the organizations are associated to institutional repositories - - executeOozieJob - IIS - - { - 'sourcePath' : 'bulkTaggingGraphPath', - 'outputPath': 'affiliationGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/affiliation/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/affiliation', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - marks as belonging to communities the result collected from datasources related to the organizations specified in the organizationCommunityMap - - executeOozieJob - IIS - - { - 'sourcePath' : 'affiliationGraphPath', - 'outputPath': 'communityOrganizationGraphPath', - 'organizationtoresultcommunitymap': 'propagationOrganizationCommunityMap' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_organization/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/community_organization', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - created relation between projects and results linked to other results trough allowedsemrel semantic relations linked to projects - - executeOozieJob - IIS - - { - 'sourcePath' : 'communityOrganizationGraphPath', - 'outputPath': 'fundingGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/funding/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/funding', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - tag as belonging to communitites result in in allowedsemrels relation with other result already linked to communities - - executeOozieJob - IIS - - { - 'sourcePath' : 'fundingGraphPath', - 'outputPath': 'communitySemRelGraphPath', - 'isLookUpUrl' : 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/community_semrel/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/community_semrel', - 'allowedsemrels' : 'isSupplementedBy;isSupplementTo', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - associated to results colleced from allowedtypes and those in the whithelist the country of the organization(s) handling the datasource it is collected from - - executeOozieJob - IIS - - { - 'sourcePath' : 'communitySemRelGraphPath', - 'outputPath': 'countryGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/country/oozie_app', - 'sparkExecutorCores' : '3', - 'sparkExecutorMemory' : '10G', - 'workingDir' : '/tmp/beta_provision/working_dir/country', - 'allowedtypes' : 'pubsrepository::institutional', - 'whitelist' : '10|opendoar____::300891a62162b960cf02ce3827bb363c', - 'saveGraph' : 'true' - } - - build-report - - - - - - - - clean the properties in the graph typed as Qualifier according to the vocabulary indicated in schemeid - - executeOozieJob - IIS - - { - 'graphInputPath' : 'countryGraphPath', - 'graphOutputPath': 'cleanedGraphPath', - 'isLookupUrl': 'isLookUpUrl' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/clean/oozie_app', - 'workingPath' : '/tmp/beta_provision/working_dir/clean' - } - - build-report - - - - - - - - removes blacklisted relations - - executeOozieJob - IIS - - { - 'sourcePath' : 'cleanedGraphPath', - 'outputPath': 'blacklistedGraphPath' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/enrichment/blacklist/oozie_app', - 'workingDir' : '/tmp/beta_provision/working_dir/blacklist', - 'postgresURL' : 'jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus', - 'postgresUser' : 'dnet', - 'postgresPassword' : '' - } - - build-report - - - - - - - - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 - SUCCESS - - - -
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_to_hiveDB.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_to_hiveDB.xml deleted file mode 100644 index 0ace12ea3..000000000 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/graph_to_hiveDB.xml +++ /dev/null @@ -1,73 +0,0 @@ - -
- - - - - -
- - Graph to HiveDB [OCEAN] - Data Provision - 30 - - - Set the path containing the AGGREGATOR graph - - inputPath - - - - - - - - Set the target path to store the RAW graph - - hiveDbName - - - - - - - - - wait configurations - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'inputPath' : 'inputPath', - 'hiveDbName' : 'hiveDbName' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/hive/oozie_app' - } - - build-report - - - - - - - - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 - SUCCESS - - - -
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_solr.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_solr.xml deleted file mode 100644 index 8a7738bcf..000000000 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_solr.xml +++ /dev/null @@ -1,98 +0,0 @@ - -
- - - - - -
- - Update Solr [OCEAN] - Data Provision - 30 - - - Set the path containing the AGGREGATOR graph - - inputGraphRootPath - - - - - - - - Set the target path to store the RAW graph - - format - TMF - - - - - - - Set the lookup address - - isLookupUrl - http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl - - - - - - - - wait configurations - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'inputGraphRootPath' : 'inputGraphRootPath', - 'isLookupUrl' : 'isLookupUrl', - 'format' : 'format' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/provision/oozie_app', - 'maxRelations' : '100', - 'relPartitions' : '3000', - 'batchSize' : '2000', - 'relationFilter' : 'isAuthorInstitutionOf,produces,hasAmongTopNSimilarDocuments', - 'otherDsTypeId' : 'scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource', - 'resumeFrom' : 'prepare_relations', - 'sparkDriverMemoryForJoining' : '3G', - 'sparkExecutorMemoryForJoining' : '7G', - 'sparkExecutorCoresForJoining' : '4', - 'sparkDriverMemoryForIndexing' : '2G', - 'sparkExecutorMemoryForIndexing' : '2G', - 'sparkExecutorCoresForIndexing' : '64', - 'sparkNetworkTimeout' : '600', - 'workingDir' : '/tmp/beta_provision/working_dir/update_solr' - } - - build-report - - - - - - - - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 - SUCCESS - - - -
\ No newline at end of file diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_stats.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_stats.xml deleted file mode 100644 index a91b6302e..000000000 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/wf/profiles/update_stats.xml +++ /dev/null @@ -1,74 +0,0 @@ - -
- - - - - -
- - Update Stats [OCEAN] - Data Provision - 30 - - - Set the path containing the AGGREGATOR graph - - openaire_db_name - - - - - - - - Set the target path to store the RAW graph - - stats_db_name - - - - - - - - - wait configurations - - - - - - - create the AGGREGATOR graph - - executeOozieJob - IIS - - { - 'openaire_db_name' : 'openaire_db_name', - 'stats_db_name' : 'stats_db_name' - } - - - { - 'oozie.wf.application.path' : '/lib/dnet/oa/graph/stats/oozie_app', - 'hive_timeout' : '3000' - } - - build-report - - - - - - - - - wf_20200615_163630_609 - 2020-06-15T17:08:00+00:00 - SUCCESS - - - -
\ No newline at end of file From ed7e28490a2cf0c08a7b67240b28bfebe6f290cc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 16:19:01 +0200 Subject: [PATCH 109/166] change in sh --- .../dhp/doiboost/crossref_dump_reader/oozie_app/download.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh index 2ce704283..bb7ec0b45 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/download.sh @@ -1,2 +1,2 @@ #!/bin/bash -curl -LSs -H "Crossref-Plus-API-Token: Bearer $4" $1 | hdfs dfs -put $2/$3 \ No newline at end of file +curl -LSs -H "Crossref-Plus-API-Token: Bearer $4" $1 | hdfs dfs -put - $2/$3 \ No newline at end of file From 6fec71e8d2bb2d27b73d83cc273bd20b2465ceb6 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 13 Aug 2021 16:39:02 +0200 Subject: [PATCH 110/166] removed the specific of the infra we are running the wf from the wf name --- .../eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml | 2 +- .../eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml index 3700ce5d9..ab3b9593e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/preprocess/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + sparkDriverMemory diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index 831ff5a57..f5596b60e 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + sparkDriverMemory From bc7068106c9b5ae1737efcd2556da267f326541a Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 17:19:44 +0200 Subject: [PATCH 111/166] added crossref download oozie workflow --- .../oozie_app/config-default.xml | 42 ++++++ .../oozie_app/workflow.xml | 128 ++++++++++++++++++ 2 files changed, 170 insertions(+) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml new file mode 100644 index 000000000..508202e30 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/config-default.xml @@ -0,0 +1,42 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + oozie.launcher.mapreduce.user.classpath.first + true + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + "com.cloudera.spark.lineage.NavigatorAppListener" + + + spark2SqlQueryExecutionListeners + "com.cloudera.spark.lineage.NavigatorQueryListener" + + \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml index e69de29bb..6e4f17912 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml @@ -0,0 +1,128 @@ + + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + + + crossrefdumpfilename + the Crossref input path + + + crossrefDumpPath + the Crossref dump path + + + crossrefdumptoken + the token for the API dump path + + + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + + ${jobTracker} + ${nameNode} + + + mapred.job.queue.name + ${queueName} + + + download.sh + ${url} + ${crossrefDumpPath} + ${crossrefdumpfilename} + ${crossrefdumptoken} + HADOOP_USER_NAME=${wf:user()} + download.sh + + + + + + + + + ${jobTracker} + ${nameNode} + eu.dnetlib.doiboost.crossref.ExtractCrossrefRecords + --hdfsServerUri${nameNode} + --crossrefFileNameTarGz${crossrefdumpfilename} + --workingPath${crossrefDumpPath} + --outputPath${crossrefDumpPath}/files/ + + + + + + + + yarn-cluster + cluster + SparkUnpackCrossrefEntries + eu.dnetlib.doiboost.crossref.UnpackCrtossrefEntries + dhp-doiboost-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.sql.shuffle.partitions=3840 + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --masteryarn-cluster + --sourcePath${crossrefDumpPath}/files + --targetPath${crossrefDumpPath}/crossref_unpack/ + + + + + + + + + + \ No newline at end of file From 82086f3422955934561a65ef5d8b8526ffbbfce4 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 17:42:14 +0200 Subject: [PATCH 112/166] fixed directory name --- .../eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml | 0 .../resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml | 0 .../resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml | 0 .../dnetlib/dhp/actionmanager/actionset_h2020_classification.xml | 0 .../eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml | 0 .../main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml | 0 .../eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml | 0 .../dhp/dedup/otherresearchproduct_dedup_configuration.xml | 0 .../eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml | 0 .../eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml | 0 .../eu/dnetlib/dhp/dedup/software_dedup_configuration.xml | 0 .../resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml | 0 .../resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml | 0 .../src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml | 0 .../src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml | 0 .../src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml | 0 .../src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml | 0 .../src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml | 0 .../main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml | 0 .../main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml | 0 .../src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml | 0 21 files changed, 0 insertions(+), 0 deletions(-) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml (100%) rename dhp-workflows/{dhp-worfklow-profiles => dhp-workflow-profiles}/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml (100%) diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_bipFinderScores.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_datacite.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_doiboost.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_h2020_classification.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_orcidworks-no-doi.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/actionmanager/actionset_ror.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/dataset_dedup_configuration.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/otherresearchproduct_dedup_configuration.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/publication_dedup_configuration.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/result_deduplication_orchestrator.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/dedup/software_dedup_configuration.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_prod_graph_for_IIS.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/01_IIS.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_prod_graph.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/03_graph2hive.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/04_graph2solr.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/05_graph2stats.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/06_publish_stats.xml diff --git a/dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml similarity index 100% rename from dhp-workflows/dhp-worfklow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml rename to dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/07_broker.xml From 7c0c67bdd66e602bbc169358bd3a678f0edee931 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 13 Aug 2021 17:45:53 +0200 Subject: [PATCH 113/166] added mock pom --- dhp-workflows/dhp-workflow-profiles/pom.xml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 dhp-workflows/dhp-workflow-profiles/pom.xml diff --git a/dhp-workflows/dhp-workflow-profiles/pom.xml b/dhp-workflows/dhp-workflow-profiles/pom.xml new file mode 100644 index 000000000..b1c51c497 --- /dev/null +++ b/dhp-workflows/dhp-workflow-profiles/pom.xml @@ -0,0 +1,12 @@ + + + + dhp-workflows + eu.dnetlib.dhp + 1.2.4-SNAPSHOT + + 4.0.0 + + dhp-workflow-profiles + + \ No newline at end of file From e5cf11d088655b2dfd3ca76781bb820aa847fbbc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 19 Aug 2021 10:29:04 +0200 Subject: [PATCH 114/166] change open access route to result matching hbm to gold --- .../oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala index db7ab4ac0..0e047d016 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/SparkApplyHostedByMapToResult.scala @@ -30,7 +30,7 @@ object SparkApplyHostedByMapToResult { inst.getHostedby.setValue(ei.getName) if (ei.getOpenAccess) { inst.setAccessright(OafMapperUtils.accessRight(ModelConstants.ACCESS_RIGHT_OPEN, "Open Access", ModelConstants.DNET_ACCESS_MODES, ModelConstants.DNET_ACCESS_MODES)) - inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.hybrid) + inst.getAccessright.setOpenAccessRoute(OpenAccessRoute.gold) p.setBestaccessright(OafMapperUtils.createBestAccessRights(p.getInstance())); } From 812bd54c57d63994fde566bb4f6a41bf1cee5d2f Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Thu, 19 Aug 2021 11:30:14 +0200 Subject: [PATCH 115/166] different funders in blacklist from BETA and PROD aggregator --- .../dhp/provision/00_beta_graph_for_IIS.xml | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml index ef2205e32..2fed35f44 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/00_beta_graph_for_IIS.xml @@ -11,10 +11,20 @@ IIS 30 - - set blacklist of funder nsPrefixes + + set blacklist of funder nsPrefixes from the beta aggregator - nsPrefixBlacklist + nsPrefixBlacklist_BETA + gsrt________,rcuk________,fct_________ + + + + + + + set blacklist of funder nsPrefixes from the production aggregator + + nsPrefixBlacklist_PROD gsrt________,rcuk________ @@ -375,7 +385,7 @@ 'reuseOAF' : 'reuseOAF_BETA', 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA', 'contentPath' : 'betaContentPath', - 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_BETA', 'shouldPatchRelations' : 'shouldPatchRelations_BETA', 'idMappingPath' : 'idMappingPath' } @@ -421,7 +431,7 @@ 'reuseOAF' : 'reuseOAF_PROD', 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD', 'contentPath' : 'prodContentPath', - 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_PROD', 'shouldPatchRelations' : 'shouldPatchRelations_PROD', 'idMappingPath' : 'idMappingPath' } From a053e1513cf489f997c88d583d2d59a8a10ae378 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Thu, 19 Aug 2021 11:32:27 +0200 Subject: [PATCH 116/166] different funders in blacklist from BETA and PROD aggregator --- .../dnetlib/dhp/provision/02_beta_graph.xml | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml index 344e99044..f83337b3c 100644 --- a/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml +++ b/dhp-workflows/dhp-workflow-profiles/src/main/resources/eu/dnetlib/dhp/provision/02_beta_graph.xml @@ -11,10 +11,20 @@ Data Provision 30 - - set blacklist of funder nsPrefixes + + set blacklist of funder nsPrefixes from the beta aggregator - nsPrefixBlacklist + nsPrefixBlacklist_BETA + gsrt________,rcuk________,fct_________ + + + + + + + set blacklist of funder nsPrefixes from the production aggregator + + nsPrefixBlacklist_PROD gsrt________,rcuk________ @@ -497,7 +507,7 @@ 'reuseOAF' : 'reuseOAF_BETA', 'reuseOAF_hdfs' : 'reuseOAFhdfs_BETA', 'contentPath' : 'betaContentPath', - 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_BETA', 'shouldPatchRelations' : 'shouldPatchRelations_BETA', 'idMappingPath' : 'idMappingPath' } @@ -543,7 +553,7 @@ 'reuseOAF' : 'reuseOAF_PROD', 'reuseOAF_hdfs' : 'reuseOAFhdfs_PROD', 'contentPath' : 'prodContentPath', - 'nsPrefixBlacklist' : 'nsPrefixBlacklist', + 'nsPrefixBlacklist' : 'nsPrefixBlacklist_PROD', 'shouldPatchRelations' : 'shouldPatchRelations_PROD', 'idMappingPath' : 'idMappingPath' } From 65822400ce9fc1597a1f7bc5674c536047c55dfd Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Aug 2021 11:10:35 +0200 Subject: [PATCH 117/166] CrossrefDump - added new parameter file that was missing --- .../generate_dataset_params.json | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json new file mode 100644 index 000000000..d903ebe1d --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/generate_dataset_params.json @@ -0,0 +1,16 @@ +[ + {"paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the source path", + "paramRequired": true}, + {"paramName":"m", + "paramLongName":"master", + "paramDescription": "the master name", + "paramRequired": true}, + {"paramName":"t", + "paramLongName":"targetPath", + "paramDescription": "the target path", + "paramRequired": true} + +] + From f3b6c392c1655511464f388824ff80ca564410f9 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Aug 2021 11:10:58 +0200 Subject: [PATCH 118/166] CrossrefDump - moving parameter file under folder crossref_dump_reader --- .../doiboost/{ => crossref_dump_reader}/crossref_dump_reader.json | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/{ => crossref_dump_reader}/crossref_dump_reader.json (100%) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/crossref_dump_reader.json similarity index 100% rename from dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader.json rename to dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/crossref_dump_reader.json From 35880c0e7b35b808c612dfe7efcde4e091d7fd35 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Aug 2021 11:11:35 +0200 Subject: [PATCH 119/166] CrossrefDump - changed the wf to be able to resume from one of the steps --- .../crossref_dump_reader/oozie_app/workflow.xml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml index 6e4f17912..c915783aa 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml @@ -41,6 +41,16 @@ + + + + + ${wf:conf('resumeFrom') eq 'ImportCrossRef'} + ${wf:conf('resumeFrom') eq 'Unpack'} + + + + From 45c62609af435e1fbd93690a8121261476befa8d Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Aug 2021 11:12:31 +0200 Subject: [PATCH 120/166] CrossrefDump - modified because parameter file was moved --- .../eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java index c7d3770a0..d1861ff0a 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/crossref/ExtractCrossrefRecords.java @@ -26,7 +26,7 @@ public class ExtractCrossrefRecords { .toString( ExtractCrossrefRecords.class .getResourceAsStream( - "/eu/dnetlib/dhp/doiboost/crossref_dump_reader.json"))); + "/eu/dnetlib/dhp/doiboost/crossref_dump_reader/crossref_dump_reader.json"))); parser.parseArgument(args); final String hdfsServerUri = parser.get("hdfsServerUri"); final String workingPath = hdfsServerUri.concat(parser.get("workingPath")); From 882abb40e466a2232a879b9113a0c28b2362848b Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 20 Aug 2021 11:12:53 +0200 Subject: [PATCH 121/166] CrossrefDump - --- .../dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml | 3 --- 1 file changed, 3 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml index c915783aa..4a98a4bb4 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/crossref_dump_reader/oozie_app/workflow.xml @@ -51,9 +51,6 @@ - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] From 4c1474e6938cdd9cef37cec895b8aa232ac59758 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Fri, 20 Aug 2021 17:03:30 +0200 Subject: [PATCH 122/166] Dealing with #6859#note-2: we have to decode URLs to avoid & and other chars encoded becasue of the original XML representation of data --- .../dhp/oa/graph/raw/OafToOafMapper.java | 9 +++ .../dhp/oa/graph/raw/OdfToOafMapper.java | 18 ++++- .../dnetlib/dhp/oa/graph/raw/MappersTest.java | 35 +++++++++ .../dnetlib/dhp/oa/graph/raw/encoded-url.xml | 40 ++++++++++ .../dhp/oa/graph/raw/encoded-url_odf.xml | 75 +++++++++++++++++++ 5 files changed, 173 insertions(+), 4 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url_odf.xml diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 2b49a9dc1..5f9491029 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -164,6 +166,13 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { .filter(n -> StringUtils.isNotBlank(n.getText())) .map(n -> n.getText().trim()) .filter(u -> u.startsWith("http")) + .map(s -> { + try { + return URLDecoder.decode(s, "UTF-8"); + } catch (UnsupportedEncodingException e) { + return s; + } + }) .distinct() .collect(Collectors.toCollection(ArrayList::new))); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 02aab4f16..a365ac9aa 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -4,6 +4,8 @@ package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*; +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; import java.util.*; import java.util.stream.Collectors; @@ -137,17 +139,17 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { final Set url = new HashSet<>(); for (final Object o : doc .selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='URL']")) { - url.add(((Node) o).getText().trim()); + url.add(trimAndDecodeUrl(((Node) o).getText().trim())); } for (final Object o : doc .selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='landingPage']")) { - url.add(((Node) o).getText().trim()); + url.add(trimAndDecodeUrl(((Node) o).getText().trim())); } for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='URL']")) { - url.add(((Node) o).getText().trim()); + url.add(trimAndDecodeUrl(((Node) o).getText().trim())); } for (final Object o : doc.selectNodes("//*[local-name()='identifier' and ./@identifierType='landingPage']")) { - url.add(((Node) o).getText().trim()); + url.add(trimAndDecodeUrl(((Node) o).getText().trim())); } for (final Object o : doc .selectNodes("//*[local-name()='alternateIdentifier' and ./@alternateIdentifierType='DOI']")) { @@ -163,6 +165,14 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { return Arrays.asList(instance); } + protected String trimAndDecodeUrl(String url){ + try { + return URLDecoder.decode(url.trim(), "UTF-8"); + } catch (UnsupportedEncodingException e) { + return url; + } + } + @Override protected List> prepareSources(final Document doc, final DataInfo info) { return new ArrayList<>(); // Not present in ODF ??? diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index 5228d1d02..cc903b49e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -11,6 +11,7 @@ import java.util.List; import java.util.Objects; import java.util.Optional; +import com.fasterxml.jackson.core.JsonProcessingException; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.dom4j.DocumentException; @@ -759,6 +760,40 @@ class MappersTest { assertEquals("UNKNOWN", p.getInstance().get(0).getRefereed().getClassid()); } + + @Test + void testXMLEncodedURL() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("encoded-url.xml"))); + final List list = new OafToOafMapper(vocs, false, true).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + final Publication p = (Publication) list.get(0); + assertTrue(p.getInstance().size() > 0); + + String decoded = "https://www.ec.europa.eu/research/participants/documents/downloadPublic?documentIds=080166e5af388993&appId=PPGMS"; + assertEquals(decoded, p.getInstance().get(0).getUrl().get(0)); + } + + @Test + void testXMLEncodedURL_ODF() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("encoded-url_odf.xml"))); + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + + final Dataset p = (Dataset) list.get(0); + assertTrue(p.getInstance().size() > 0); + for(String url : p.getInstance().get(0).getUrl()){ + System.out.println(url); + assertTrue(!url.contains("&")); + } + } + private void assertValidId(final String id) { // System.out.println(id); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url.xml new file mode 100644 index 000000000..c9cafea4f --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url.xml @@ -0,0 +1,40 @@ + + +
+ r3c4b2081b22::0001f1a60b1acbb28dd66b4f6c7d881f + https://www.ec.europa.eu/research/participants/documents/downloadPublic?documentIds=080166e5af388993&appId=PPGMS + 2021-06-24T10:40:25.346Z + r3c4b2081b22 + 2021-06-24T10:40:55.153Z +
+ + https://www.ec.europa.eu/research/participants/documents/downloadPublic?documentIds=080166e5af388993&appId=PPGMS + Progress report on preparing Strategic plan for Technology transfer from EPPL/FMPI CU + OPEN + + Documents, reports + 0034 + Progress report on preparation process of the technology transfer plan for CU. + corda__h2020::692335 + + + + + file%3A%2F%2F%2Fvar%2Flib%2Fdnet%2Fdata%2Fopendata%2Fcordis-h2020projectDeliverables.tsv + + + + + + + false + false + 0.9 + + + +
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url_odf.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url_odf.xml new file mode 100644 index 000000000..b970605a6 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/encoded-url_odf.xml @@ -0,0 +1,75 @@ + + + + opentrials__::0000bf8e63d3d7e6b88421eabafae3f6 + feabb67c-1fd1-423b-aec6-606d04ce53c6 + 2019-03-27T15:15:22.22Z + opentrials__ + 2019-04-17T16:04:20.586Z + + + + https://clinicaltrials.gov/ct2/show/NCT02321059&test=yes + + http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059&test=yes + NCT02321059 + + + + Jensen, Kristian K + + + + Validation of the Goodstrength System for Assessment of Abdominal Wall Strength in Patients With Incisional Hernia + + nct + + Denmark + + 0037 + + Patients with an incisional hernia in the midline and controls with an intact abdominal wall are examined twice with one week apart, in order to establish the test-retest reliability and internal and external validity of the Goodstrength trunk dynamometer. + + + OPEN + 0037 + 2014-11-11 + + + + + false + false + 0.9 + + + + + + + + + file:///var/lib/dnet/data/opentrials/opentrials.csv + + + + + + + false + false + 0.9 + + + + + \ No newline at end of file From 412d2cb16a6c3f6352ec720cf2e2bf591ca00f7d Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Mon, 23 Aug 2021 14:32:00 +0200 Subject: [PATCH 123/166] added dependencies to classgraph and opencsv. Bumped version of dhp-schemas --- pom.xml | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 1a3523f2b..99525ef85 100644 --- a/pom.xml +++ b/pom.xml @@ -206,6 +206,12 @@ 1.0.7 + + me.xuender + unidecode + 0.0.7 + + com.google.guava guava @@ -514,6 +520,17 @@ ${common.text.version} + + com.opencsv + opencsv + 5.5 + + + io.github.classgraph + classgraph + 4.8.71 + + @@ -736,7 +753,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.7.15-SNAPSHOT] + [2.7.17] [4.0.3] [6.0.5] [3.1.6] From f19b04d41b0a13503a7c5e559c4bf3b330559c75 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Mon, 23 Aug 2021 14:33:39 +0200 Subject: [PATCH 124/166] code formatting after mvn compile --- .../raw/MigrateDbEntitiesApplicationTest.java | 166 +++++++++--------- 1 file changed, 85 insertions(+), 81 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java index 2078735f3..b65bd9fd8 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MigrateDbEntitiesApplicationTest.java @@ -51,8 +51,11 @@ public class MigrateDbEntitiesApplicationTest { public void setUp() { lenient() .when(vocs.getTermAsQualifier(anyString(), anyString())) - .thenAnswer(invocation -> OafMapperUtils - .qualifier(invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0), invocation.getArgument(0))); + .thenAnswer( + invocation -> OafMapperUtils + .qualifier( + invocation.getArgument(1), invocation.getArgument(1), invocation.getArgument(0), + invocation.getArgument(0))); lenient().when(vocs.termExists(anyString(), anyString())).thenReturn(true); @@ -235,72 +238,73 @@ public class MigrateDbEntitiesApplicationTest { private List prepareMocks(final String jsonFile) throws IOException, SQLException { final String json = IOUtils.toString(getClass().getResourceAsStream(jsonFile)); final ObjectMapper mapper = new ObjectMapper(); - final List list = mapper.readValue(json, new TypeReference>() {}); + final List list = mapper.readValue(json, new TypeReference>() { + }); for (final TypedField tf : list) { if (tf.getValue() == null) { switch (tf.getType()) { - case "not_used": - break; - case "boolean": - Mockito.when(rs.getBoolean(tf.getField())).thenReturn(false); - break; - case "date": - Mockito.when(rs.getDate(tf.getField())).thenReturn(null); - break; - case "int": - Mockito.when(rs.getInt(tf.getField())).thenReturn(0); - break; - case "double": - Mockito.when(rs.getDouble(tf.getField())).thenReturn(0.0); - break; - case "array": - Mockito.when(rs.getArray(tf.getField())).thenReturn(null); - break; - case "string": - default: - Mockito.when(rs.getString(tf.getField())).thenReturn(null); - break; + case "not_used": + break; + case "boolean": + Mockito.when(rs.getBoolean(tf.getField())).thenReturn(false); + break; + case "date": + Mockito.when(rs.getDate(tf.getField())).thenReturn(null); + break; + case "int": + Mockito.when(rs.getInt(tf.getField())).thenReturn(0); + break; + case "double": + Mockito.when(rs.getDouble(tf.getField())).thenReturn(0.0); + break; + case "array": + Mockito.when(rs.getArray(tf.getField())).thenReturn(null); + break; + case "string": + default: + Mockito.when(rs.getString(tf.getField())).thenReturn(null); + break; } } else { switch (tf.getType()) { - case "not_used": - break; - case "boolean": - Mockito - .when(rs.getBoolean(tf.getField())) - .thenReturn(Boolean.parseBoolean(tf.getValue().toString())); - break; - case "date": - Mockito - .when(rs.getDate(tf.getField())) - .thenReturn(Date.valueOf(tf.getValue().toString())); - break; - case "int": - Mockito - .when(rs.getInt(tf.getField())) - .thenReturn(new Integer(tf.getValue().toString())); - break; - case "double": - Mockito - .when(rs.getDouble(tf.getField())) - .thenReturn(new Double(tf.getValue().toString())); - break; - case "array": - final Array arr = Mockito.mock(Array.class); - final String[] values = ((List) tf.getValue()) - .stream() - .filter(Objects::nonNull) - .map(o -> o.toString()) - .toArray(String[]::new); + case "not_used": + break; + case "boolean": + Mockito + .when(rs.getBoolean(tf.getField())) + .thenReturn(Boolean.parseBoolean(tf.getValue().toString())); + break; + case "date": + Mockito + .when(rs.getDate(tf.getField())) + .thenReturn(Date.valueOf(tf.getValue().toString())); + break; + case "int": + Mockito + .when(rs.getInt(tf.getField())) + .thenReturn(new Integer(tf.getValue().toString())); + break; + case "double": + Mockito + .when(rs.getDouble(tf.getField())) + .thenReturn(new Double(tf.getValue().toString())); + break; + case "array": + final Array arr = Mockito.mock(Array.class); + final String[] values = ((List) tf.getValue()) + .stream() + .filter(Objects::nonNull) + .map(o -> o.toString()) + .toArray(String[]::new); - Mockito.when(arr.getArray()).thenReturn(values); - Mockito.when(rs.getArray(tf.getField())).thenReturn(arr); - break; - case "string": - default: - Mockito.when(rs.getString(tf.getField())).thenReturn(tf.getValue().toString()); - break; + Mockito.when(arr.getArray()).thenReturn(values); + Mockito.when(rs.getArray(tf.getField())).thenReturn(arr); + break; + case "string": + default: + Mockito.when(rs.getString(tf.getField())).thenReturn(tf.getValue().toString()); + break; } } } @@ -312,27 +316,27 @@ public class MigrateDbEntitiesApplicationTest { for (final TypedField tf : list) { switch (tf.getType()) { - case "not_used": - break; - case "boolean": - Mockito.verify(rs, Mockito.atLeastOnce()).getBoolean(tf.getField()); - break; - case "date": - Mockito.verify(rs, Mockito.atLeastOnce()).getDate(tf.getField()); - break; - case "int": - Mockito.verify(rs, Mockito.atLeastOnce()).getInt(tf.getField()); - break; - case "double": - Mockito.verify(rs, Mockito.atLeastOnce()).getDouble(tf.getField()); - break; - case "array": - Mockito.verify(rs, Mockito.atLeastOnce()).getArray(tf.getField()); - break; - case "string": - default: - Mockito.verify(rs, Mockito.atLeastOnce()).getString(tf.getField()); - break; + case "not_used": + break; + case "boolean": + Mockito.verify(rs, Mockito.atLeastOnce()).getBoolean(tf.getField()); + break; + case "date": + Mockito.verify(rs, Mockito.atLeastOnce()).getDate(tf.getField()); + break; + case "int": + Mockito.verify(rs, Mockito.atLeastOnce()).getInt(tf.getField()); + break; + case "double": + Mockito.verify(rs, Mockito.atLeastOnce()).getDouble(tf.getField()); + break; + case "array": + Mockito.verify(rs, Mockito.atLeastOnce()).getArray(tf.getField()); + break; + case "string": + default: + Mockito.verify(rs, Mockito.atLeastOnce()).getString(tf.getField()); + break; } } } From 00a28c008079266c54d7ac07d0646bf80ff80c2d Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Mon, 23 Aug 2021 15:02:21 +0200 Subject: [PATCH 125/166] originalId was renamed to acronym --- .../dhp/oa/graph/dump/complete/CreateEntityTest.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java index f881e6b30..b5f73da71 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/dump/complete/CreateEntityTest.java @@ -97,7 +97,7 @@ public class CreateEntityTest { Assertions.assertEquals(12, riList.size()); riList.stream().forEach(c -> { - switch (c.getOriginalId()) { + switch (c.getAcronym()) { case "mes": Assertions .assertTrue(c.getType().equals(eu.dnetlib.dhp.oa.graph.dump.Constants.RESEARCH_COMMUNITY)); @@ -115,9 +115,9 @@ public class CreateEntityTest { String .format( "%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5(c.getOriginalId())))); + DHPUtils.md5(c.getAcronym())))); Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_mes")); - Assertions.assertTrue("mes".equals(c.getOriginalId())); + Assertions.assertTrue("mes".equals(c.getAcronym())); break; case "clarin": Assertions @@ -130,9 +130,9 @@ public class CreateEntityTest { String .format( "%s|%s::%s", Constants.CONTEXT_ID, Constants.CONTEXT_NS_PREFIX, - DHPUtils.md5(c.getOriginalId())))); + DHPUtils.md5(c.getAcronym())))); Assertions.assertTrue(c.getZenodo_community().equals("https://zenodo.org/communities/oac_clarin")); - Assertions.assertTrue("clarin".equals(c.getOriginalId())); + Assertions.assertTrue("clarin".equals(c.getAcronym())); break; } // TODO add check for all the others Entities From 45898c71acea6ce2fc3b969dde4189083eb6b03a Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 24 Aug 2021 15:20:04 +0200 Subject: [PATCH 126/166] fixed wrong doi in pubmed --- .../dhp/sx/graph/SparkResolveRelation.scala | 12 +- .../dhp/sx/graph/bio/pubmed/PubMedToOaf.scala | 28 +- .../sx/graph/bio/pubmed/BioScholixTest.scala | 13 +- .../sx/graph/scholix/ScholixGraphTest.scala | 11 + .../dnetlib/dhp/sx/graph/scholix/result.json | 508 ++++++++++++++++++ 5 files changed, 558 insertions(+), 14 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/scholix/result.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala index b2fddec20..1b13b81c7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/SparkResolveRelation.scala @@ -80,7 +80,7 @@ object SparkResolveRelation { } - private def extractPidsFromRecord(input:String):(String,List[(String,String)]) = { + def extractPidsFromRecord(input:String):(String,List[(String,String)]) = { implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: json4s.JValue = parse(input) val id:String = (json \ "id").extract[String] @@ -90,7 +90,15 @@ object SparkResolveRelation { JField("qualifier", JObject(qualifier)) <- pids JField("classname", JString(pidType)) <- qualifier } yield (pidValue, pidType) - (id,result) + + val alternateIds: List[(String,String)] = for { + JObject(pids) <- json \\ "alternateIdentifier" + JField("value", JString(pidValue)) <- pids + JField("qualifier", JObject(qualifier)) <- pids + JField("classname", JString(pidType)) <- qualifier + } yield (pidValue, pidType) + + (id,result:::alternateIds) } private def extractPidResolvedTableFromJsonRDD(spark: SparkSession, entityPath: String, workingPath: String) = { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala index 9a49deebc..377bd902d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala @@ -1,8 +1,8 @@ package eu.dnetlib.dhp.sx.graph.bio.pubmed - +import java.util.regex.Pattern import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} +import eu.dnetlib.dhp.schema.oaf.utils.{CleaningFunctions, GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} import eu.dnetlib.dhp.schema.oaf._ import scala.collection.JavaConverters._ @@ -15,6 +15,20 @@ object PubMedToOaf { "doi" -> "https://dx.doi.org/" ) + def cleanDoi(doi:String):String = { + + val regex = "10.\\d{4,9}\\/[-._;()\\/:A-Z0-9]+$" + + + val pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE) + val matcher = pattern.matcher(doi) + + if (matcher.find) { + return matcher.group(0) + } + null + } + def createResult(cobjQualifier: Qualifier, vocabularies: VocabularyGroup): Result = { val result_typologies = getVocabularyTerm(ModelConstants.DNET_RESULT_TYPOLOGIES, vocabularies, cobjQualifier.getClassid) result_typologies.getClassid match { @@ -60,8 +74,12 @@ object PubMedToOaf { var pidList: List[StructuredProperty] = List(OafMapperUtils.structuredProperty(article.getPmid, PidType.pmid.toString, PidType.pmid.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo)) if (pidList == null) return null + + var alternateIdentifier :StructuredProperty = null if (article.getDoi != null) { - pidList = pidList ::: List(OafMapperUtils.structuredProperty(article.getDoi, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo)) + val normalizedPid = cleanDoi(article.getDoi) + if (normalizedPid!= null) + alternateIdentifier = OafMapperUtils.structuredProperty(normalizedPid, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo) } // If the article contains the typology Journal Article then we apply this type @@ -84,9 +102,9 @@ object PubMedToOaf { return result result.setDataInfo(dataInfo) i.setPid(pidList.asJava) + if (alternateIdentifier!= null) + i.setAlternateIdentifier(List(alternateIdentifier).asJava) result.setInstance(List(i).asJava) - - i.getPid.asScala.filter(p => "pmid".equalsIgnoreCase(p.getQualifier.getClassid)).map(p => p.getValue)(collection breakOut) val urlLists: List[String] = pidList .map(s => (urlMap.getOrElse(s.getQualifier.getClassid, ""), s.getValue)) diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala index 894c0a795..8e063db7c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala @@ -1,9 +1,12 @@ package eu.dnetlib.dhp.sx.graph.bio.pubmed import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, SerializationFeature} +import eu.dnetlib.dhp.schema.common.ModelConstants +import eu.dnetlib.dhp.schema.oaf.utils.{CleaningFunctions, OafMapperUtils, PidType} import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF.ScholixResolved import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF +import eu.dnetlib.dhp.sx.graph.bio.pubmed.PubMedToOaf.dataInfo import org.json4s.DefaultFormats import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.parse @@ -64,6 +67,9 @@ class BioScholixTest extends AbstractVocabularyTest{ assertEquals(10, r.size) assertTrue(r.map(p => p.asInstanceOf[Result]).flatMap(p => p.getInstance().asScala.map(i => i.getInstancetype.getClassid)).exists(p => "0037".equalsIgnoreCase(p))) println(mapper.writeValueAsString(r.head)) + + + } @@ -179,13 +185,6 @@ class BioScholixTest extends AbstractVocabularyTest{ val result:List[Oaf] = l.map(s => BioDBToOAF.scholixResolvedToOAF(s)) assertTrue(result.nonEmpty) - - - - - - - } } diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala index aaac2b7ee..5b7fbe1cf 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/scholix/ScholixGraphTest.scala @@ -4,6 +4,7 @@ import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, Ser import eu.dnetlib.dhp.schema.oaf.{Relation, Result} import eu.dnetlib.dhp.schema.sx.scholix.Scholix import eu.dnetlib.dhp.schema.sx.summary.ScholixSummary +import eu.dnetlib.dhp.sx.graph.SparkResolveRelation import eu.dnetlib.dhp.sx.graph.bio.pubmed.AbstractVocabularyTest import org.json4s import org.json4s.DefaultFormats @@ -30,6 +31,16 @@ class ScholixGraphTest extends AbstractVocabularyTest{ } + @Test + def testExtractPids():Unit = { + + val input = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/scholix/result.json")).mkString + val res =SparkResolveRelation.extractPidsFromRecord(input) + assertNotNull(res) + assertTrue(res._2.size == 2) + + } + @Test def testOAFToSummary():Unit= { val inputRelations = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/oaf_to_summary")).mkString diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/scholix/result.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/scholix/result.json new file mode 100644 index 000000000..f14d5fedf --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/scholix/result.json @@ -0,0 +1,508 @@ +{ + "collectedfrom": [ + { + "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c", + "value": "Europe PubMed Central", + "dataInfo": null + } + ], + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + }, + "lastupdatetimestamp": null, + "id": "50|pmid________::cd23b96c02d937c971c1b56d6aa0bf4f", + "originalId": [ + "10025635" + ], + "pid": [ + { + "value": "10025635", + "qualifier": { + "classid": "pmid", + "classname": "pmid", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "dateofcollection": null, + "dateoftransformation": null, + "extraInfo": null, + "oaiprovenance": null, + "measures": null, + "author": [ + { + "fullname": "D J, Marcellin-Little", + "name": "D J", + "surname": "Marcellin-Little", + "rank": 1, + "pid": null, + "affiliation": null + }, + { + "fullname": "B A, DeYoung", + "name": "B A", + "surname": "DeYoung", + "rank": 2, + "pid": null, + "affiliation": null + }, + { + "fullname": "D H, Doyens", + "name": "D H", + "surname": "Doyens", + "rank": 3, + "pid": null, + "affiliation": null + }, + { + "fullname": "D J, DeYoung", + "name": "D J", + "surname": "DeYoung", + "rank": 4, + "pid": null, + "affiliation": null + } + ], + "resulttype": { + "classid": "dataset", + "classname": "dataset", + "schemeid": "dnet:result_typologies", + "schemename": "dnet:result_typologies" + }, + "language": null, + "country": null, + "subject": [ + { + "value": "Animals", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Arthroplasty, Replacement, Hip", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Dogs", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Follow-Up Studies", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Hip Joint", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Hip Prosthesis", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Osseointegration", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Prospective Studies", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Radiography", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Survival Analysis", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + { + "value": "Treatment Outcome", + "qualifier": { + "classid": "keywords", + "classname": "keywords", + "schemeid": "dnet:subject_classification_typologies", + "schemename": "dnet:subject_classification_typologies" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "title": [ + { + "value": "Canine uncemented porous-coated anatomic total hip arthroplasty: results of a long-term prospective evaluation of 50 consecutive cases.", + "qualifier": { + "classid": "main title", + "classname": "main title", + "schemeid": "dnet:dataCite_title", + "schemename": "dnet:dataCite_title" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "relevantdate": null, + "description": [ + { + "value": "To evaluate the long-term clinical and radiographic results of a canine uncemented porous-coated anatomic (PCA) total hip arthroplasty (THA).Prospective study of consecutive clinical patients using survival analysis.Forty-one dogs that underwent PCA THA; nine had bilateral PCA THA (50 prostheses).Gait observation, orthopedic examination, and radiographic assessment were conducted before THA, 6 months after THA, and yearly thereafter. A zonal analysis system was used to document osseous changes in the femur and the acetabulum. Acetabular cup and femoral stem subsidence and migration, femoral canal fill, and implant orientation were measured. Survival analysis of the procedure was conducted.Long-term follow-up was available for 37 dogs (46 prostheses). The median follow-up was 63 months. Limb function was normal for 37 limbs and abnormal for 9 limbs because of dislocation (n = 3), lumbosacral disease (n = 2), degenerative myelopathy (n = 1), autoimmune disease (n = 1), brain tumor (n = 1), or osteosarcoma of the femur (n = 1). All prosthetic stems and cups were fixed by bone ingrowth fixation. Osteolysis was not observed. Bone infarction occurred in five femoral canals (four dogs). The 6-year survival rate for the procedure was 87% (95% confidence interval, 72%-96%).Long-term fixation of the uncemented PCA acetabular cup and stem is successful in dogs, and long-term clinical function is excellent.", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "dateofacceptance": { + "value": "1999-02-20", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "publisher": null, + "embargoenddate": null, + "source": null, + "fulltext": null, + "format": null, + "contributor": null, + "resourcetype": null, + "coverage": null, + "bestaccessright": null, + "context": null, + "externalReference": null, + "instance": [ + { + "license": null, + "accessright": null, + "instancetype": { + "classid": "0037", + "classname": "Clinical Trial", + "schemeid": "dnet:publication_resource", + "schemename": "dnet:publication_resource" + }, + "hostedby": null, + "url": [ + "https://pubmed.ncbi.nlm.nih.gov/10025635" + ], + "distributionlocation": null, + "collectedfrom": { + "key": "10|opendoar____::8b6dd7db9af49e67306feb59a8bdc52c", + "value": "Europe PubMed Central", + "dataInfo": null + }, + "pid": [ + { + "value": "10025635", + "qualifier": { + "classid": "pmid", + "classname": "pmid", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "alternateIdentifier": [ + { + "value": "10.1053/jvet.1999.0010", + "qualifier": { + "classid": "doi", + "classname": "doi", + "schemeid": "dnet:pid_types", + "schemename": "dnet:pid_types" + }, + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + } + ], + "dateofacceptance": { + "value": "1999-02-20", + "dataInfo": { + "invisible": false, + "inferred": false, + "deletedbyinference": false, + "trust": "0.9", + "inferenceprovenance": null, + "provenanceaction": { + "classid": "sysimport:actionset", + "classname": "sysimport:actionset", + "schemeid": "dnet:provenanceActions", + "schemename": "dnet:provenanceActions" + } + } + }, + "processingchargeamount": null, + "processingchargecurrency": null, + "refereed": null + } + ], + "storagedate": null, + "device": null, + "size": null, + "version": null, + "lastmetadataupdate": null, + "metadataversionnumber": null, + "geolocation": null +} From ccf4103a25ccfc29d4621190e1df821d399bc929 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Wed, 25 Aug 2021 10:07:58 +0200 Subject: [PATCH 127/166] keep the original url if the decoder fails for any reason --- .../main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java | 2 +- .../main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index 5f9491029..b7afd3595 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -169,7 +169,7 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { .map(s -> { try { return URLDecoder.decode(s, "UTF-8"); - } catch (UnsupportedEncodingException e) { + } catch (Throwable t) { return s; } }) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index 440dbaf6c..194715295 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -168,7 +168,7 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { protected String trimAndDecodeUrl(String url) { try { return URLDecoder.decode(url.trim(), "UTF-8"); - } catch (UnsupportedEncodingException e) { + } catch (Throwable t) { return url; } } From e8b3cb9147d63380d64c4ed63e977f57fa42d161 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 30 Aug 2021 09:32:21 +0200 Subject: [PATCH 128/166] Implemented method to download delta updates in EBI Links --- .../datacite/AbstractRestClient.scala | 33 ++--- .../dhp/sx/graph/bio/pubmed/PubMedToOaf.scala | 6 +- .../sx/graph/ebi/SparkDownloadEBILinks.scala | 115 ++++++++++++++++++ .../dhp/sx/graph/ebi/ebi_download_update.json | 5 + .../ebi/update/oozie_app/config-default.xml | 68 +++++++++++ .../graph/ebi/update/oozie_app/workflow.xml | 59 +++++++++ 6 files changed, 269 insertions(+), 17 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala index 92a870e37..bae41b218 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/AbstractRestClient.scala @@ -8,14 +8,15 @@ import org.apache.http.impl.client.{HttpClientBuilder, HttpClients} import java.io.IOException -abstract class AbstractRestClient extends Iterator[String]{ + +abstract class AbstractRestClient extends Iterator[String] { var buffer: List[String] = List() - var current_index:Int = 0 + var current_index: Int = 0 var scroll_value: Option[String] = None - var complete:Boolean = false + var complete: Boolean = false def extractInfo(input: String): Unit @@ -23,13 +24,13 @@ abstract class AbstractRestClient extends Iterator[String]{ protected def getBufferData(): Unit - def doHTTPGETRequest(url:String): String = { + def doHTTPGETRequest(url: String): String = { val httpGet = new HttpGet(url) doHTTPRequest(httpGet) } - def doHTTPPOSTRequest(url:String, json:String): String = { + def doHTTPPOSTRequest(url: String, json: String): String = { val httpPost = new HttpPost(url) if (json != null) { val entity = new StringEntity(json) @@ -46,7 +47,7 @@ abstract class AbstractRestClient extends Iterator[String]{ override def next(): String = { - val next_item:String = buffer(current_index) + val next_item: String = buffer(current_index) current_index = current_index + 1 if (current_index == buffer.size) getBufferData() @@ -54,17 +55,16 @@ abstract class AbstractRestClient extends Iterator[String]{ } - - - private def doHTTPRequest[A <: HttpUriRequest](r: A) :String ={ + private def doHTTPRequest[A <: HttpUriRequest](r: A): String = { val timeout = 60; // seconds val config = RequestConfig.custom() .setConnectTimeout(timeout * 1000) .setConnectionRequestTimeout(timeout * 1000) .setSocketTimeout(timeout * 1000).build() - val client =HttpClientBuilder.create().setDefaultRequestConfig(config).build() - var tries = 4 - while (tries > 0) { + val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() + try { + var tries = 4 + while (tries > 0) { println(s"requesting ${r.getURI}") try { val response = client.execute(r) @@ -78,10 +78,15 @@ abstract class AbstractRestClient extends Iterator[String]{ case e: Throwable => println(s"Error on requesting ${r.getURI}") e.printStackTrace() - tries-=1 + tries -= 1 } } "" - } + } finally { + if (client != null) + client.close() + } + } + getBufferData() } \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala index 377bd902d..202eb7b14 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/PubMedToOaf.scala @@ -1,10 +1,10 @@ package eu.dnetlib.dhp.sx.graph.bio.pubmed -import java.util.regex.Pattern import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup import eu.dnetlib.dhp.schema.common.ModelConstants -import eu.dnetlib.dhp.schema.oaf.utils.{CleaningFunctions, GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} import eu.dnetlib.dhp.schema.oaf._ +import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, IdentifierFactory, OafMapperUtils, PidType} +import java.util.regex.Pattern import scala.collection.JavaConverters._ object PubMedToOaf { @@ -17,7 +17,7 @@ object PubMedToOaf { def cleanDoi(doi:String):String = { - val regex = "10.\\d{4,9}\\/[-._;()\\/:A-Z0-9]+$" + val regex = "^10.\\d{4,9}\\/[\\[\\]\\-\\<\\>._;()\\/:A-Z0-9]+$" val pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala new file mode 100644 index 000000000..08e060459 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala @@ -0,0 +1,115 @@ +package eu.dnetlib.dhp.sx.graph.ebi + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF.EBILinkItem +import eu.dnetlib.dhp.sx.graph.bio.pubmed.{PMArticle, PMAuthor, PMJournal} +import org.apache.commons.io.IOUtils +import org.apache.http.client.config.RequestConfig +import org.apache.http.client.methods.{HttpGet, HttpUriRequest} +import org.apache.http.impl.client.HttpClientBuilder +import org.apache.spark.SparkConf +import org.apache.spark.sql.expressions.Aggregator +import org.apache.spark.sql.functions.max +import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.slf4j.{Logger, LoggerFactory} + +object SparkDownloadEBILinks { + + + def createEBILinks(pmid:Long):EBILinkItem = { + + val res = requestLinks(pmid) + if (res!=null) + return EBILinkItem(pmid, res) + null + } + + + def requestLinks(PMID:Long):String = { + val r = new HttpGet(s"https://www.ebi.ac.uk/europepmc/webservices/rest/MED/$PMID/datalinks?format=json") + val timeout = 60; // seconds + val config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build() + val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() + try { + var tries = 4 + while (tries > 0) { + println(s"requesting ${r.getURI}") + try { + val response = client.execute(r) + println(s"get response with status${response.getStatusLine.getStatusCode}") + if (response.getStatusLine.getStatusCode > 400) { + tries -= 1 + } + else + return IOUtils.toString(response.getEntity.getContent) + } catch { + case e: Throwable => + println(s"Error on requesting ${r.getURI}") + e.printStackTrace() + tries -= 1 + } + } + "" + } finally { + if (client != null) + client.close() + } + + } + def main(args: Array[String]): Unit = { + + val log: Logger = LoggerFactory.getLogger(getClass) + val MAX_ITEM_PER_PARTITION = 20000 + val conf: SparkConf = new SparkConf() + val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json"))) + parser.parseArgument(args) + val spark: SparkSession = + SparkSession + .builder() + .config(conf) + .appName(SparkEBILinksToOaf.getClass.getSimpleName) + .master(parser.get("master")).getOrCreate() + + import spark.implicits._ + + implicit val PMEncoder: Encoder[PMArticle] = Encoders.kryo(classOf[PMArticle]) + implicit val PMJEncoder: Encoder[PMJournal] = Encoders.kryo(classOf[PMJournal]) + implicit val PMAEncoder: Encoder[PMAuthor] = Encoders.kryo(classOf[PMAuthor]) + + val sourcePath = parser.get("sourcePath") + log.info(s"sourcePath -> $sourcePath") + val workingPath = parser.get("workingPath") + log.info(s"workingPath -> $workingPath") + + log.info("Getting max pubmedId where the links have been requested") + val links:Dataset[EBILinkItem] = spark.read.load(s"$sourcePath/ebi_links_dataset").as[EBILinkItem] + val lastPMIDRequested =links.map(l => l.id).select(max("value")).first.getLong(0) + + log.info("Retrieving PMID to request links") + val pubmed = spark.read.load(s"$sourcePath/baseline_dataset").as[PMArticle] + pubmed.map(p => p.getPmid.toLong).where(s"value > $lastPMIDRequested").write.mode(SaveMode.Overwrite).save(s"$workingPath/id_to_request") + + val pmidToReq:Dataset[Long] = spark.read.load(s"$workingPath/id_to_request").as[Long] + + val total = pmidToReq.count() + + spark.createDataset(pmidToReq.rdd.repartition((total/MAX_ITEM_PER_PARTITION).toInt).map(pmid =>createEBILinks(pmid)).filter(l => l!= null)).write.mode(SaveMode.Overwrite).save(s"$workingPath/links_update") + + val updates:Dataset[EBILinkItem] =spark.read.load(s"$workingPath/links_update").as[EBILinkItem] + + links.union(updates).groupByKey(_.id) + .reduceGroups{(x,y) => + if (x == null || x.links ==null) + y + if (y ==null || y.links ==null) + x + if (x.links.length > y.links.length) + x + else + y + }.map(_._2).write.mode(SaveMode.Overwrite).save(s"$workingPath/links_final") + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json new file mode 100644 index 000000000..0ae19234a --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json @@ -0,0 +1,5 @@ +[ + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"s", "paramLongName":"sourcePath","paramDescription": "the source Path", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath","paramDescription": "the working path ", "paramRequired": true} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/config-default.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/config-default.xml new file mode 100644 index 000000000..17cd6c9a3 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/config-default.xml @@ -0,0 +1,68 @@ + + + + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + + + + + + + + + + + + + + + + + + + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + "com.cloudera.spark.lineage.NavigatorAppListener" + + + spark2SqlQueryExecutionListeners + "com.cloudera.spark.lineage.NavigatorQueryListener" + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml new file mode 100644 index 000000000..1b738caed --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml @@ -0,0 +1,59 @@ + + + + sourcePath + the Working Path + + + workingPath + the Working Path + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + yarn-cluster + cluster + Incremental Download EBI Links + eu.dnetlib.dhp.sx.graph.ebi.SparkDownloadEBILinks + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.shuffle.partitions=2000 + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath} + --workingPath${workingPath} + --masteryarn + + + + + + + \ No newline at end of file From 3762b17f7b557035dbd680e313554ffb11dfcd54 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Tue, 31 Aug 2021 20:20:05 +0200 Subject: [PATCH 129/166] added VERSIOn and PART relationship and re-ordered according to my personal and obviously possibly biased ordering --- .../dhp/oa/provision/model/SortableRelationKey.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelationKey.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelationKey.java index 463c15e9e..cf441a517 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelationKey.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/SortableRelationKey.java @@ -18,17 +18,18 @@ public class SortableRelationKey implements Comparable, Ser static { weights.put(ModelConstants.PARTICIPATION, 0); - weights.put(ModelConstants.OUTCOME, 1); weights.put(ModelConstants.AFFILIATION, 2); weights.put(ModelConstants.DEDUP, 3); weights.put(ModelConstants.PUBLICATION_DATASET, 4); - weights.put(ModelConstants.CITATION, 5); - weights.put(ModelConstants.SUPPLEMENT, 6); - weights.put(ModelConstants.REVIEW, 7); - weights.put(ModelConstants.RELATIONSHIP, 8); + weights.put(ModelConstants.SUPPLEMENT, 5); + weights.put(ModelConstants.REVIEW, 6); + weights.put(ModelConstants.RELATIONSHIP, 7); + weights.put(ModelConstants.PART, 8); weights.put(ModelConstants.PROVISION, 9); - weights.put(ModelConstants.SIMILARITY, 10); + weights.put(ModelConstants.VERSION, 10); + weights.put(ModelConstants.SIMILARITY, 11); + weights.put(ModelConstants.CITATION, 12); } private static final long serialVersionUID = 3232323; From 9f8a80deb739893d71eb72e396d964f12f549681 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 1 Sep 2021 14:16:27 +0200 Subject: [PATCH 130/166] fixed wrong import of unresolved relation in openaire --- .../dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml | 2 +- .../java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml index 021704f54..c6332ff7d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/datacite/oozie_app/workflow.xml @@ -70,7 +70,7 @@ --sourcePath${mainPath}/datacite_dump --targetPath${mainPath}/datacite_oaf --isLookupUrl${isLookupUrl} - --exportLinkstrue + --exportLinksfalse --masteryarn-cluster diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java index ae899c3d8..fdf397ad7 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PrepareRelationsJob.java @@ -131,6 +131,7 @@ public class PrepareRelationsJob { Set relationFilter, int sourceMaxRelations, int targetMaxRelations, int relPartitions) { JavaRDD rels = readPathRelationRDD(spark, inputRelationsPath) + .filter(rel -> !(rel.getSource().startsWith("unresolved") || rel.getTarget().startsWith("unresolved"))) .filter(rel -> !rel.getDataInfo().getDeletedbyinference()) .filter(rel -> !relationFilter.contains(StringUtils.lowerCase(rel.getRelClass()))); From d4dadf6d77b102c6eec63386f291a3d37d5942b2 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Thu, 2 Sep 2021 14:21:24 +0200 Subject: [PATCH 131/166] reduced max number of PID in Relatedentity --- .../CreateRelatedEntitiesJob_phase1.java | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java index b2abbc156..a33a45517 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/CreateRelatedEntitiesJob_phase1.java @@ -101,13 +101,22 @@ public class CreateRelatedEntitiesJob_phase1 { Encoders.tuple(Encoders.STRING(), Encoders.kryo(Relation.class))) .cache(); - final Dataset> entities = readPathEntity(spark, inputEntityPath, clazz) + readPathEntity(spark, inputEntityPath, clazz) .filter("dataInfo.invisible == false") .map( (MapFunction>) e -> new Tuple2<>(e.getId(), asRelatedEntity(e, clazz)), Encoders .tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class))) - .cache(); + .write() + .mode(SaveMode.Overwrite) + .save("/tmp/beta_provision/working_dir/update_solr/join_partial/relatedEntities/" + clazz.getSimpleName()); + + final Dataset> entities = spark + .read() + .load("/tmp/beta_provision/working_dir/update_solr/join_partial/relatedEntities/" + clazz.getSimpleName()) + .as( + Encoders + .tuple(Encoders.STRING(), Encoders.kryo(RelatedEntity.class))); relsByTarget .joinWith(entities, entities.col("_1").equalTo(relsByTarget.col("_1")), "inner") @@ -140,7 +149,8 @@ public class CreateRelatedEntitiesJob_phase1 { re.setId(entity.getId()); re.setType(EntityType.fromClass(clazz).name()); - re.setPid(entity.getPid()); + if (entity.getPid() != null) + re.setPid(entity.getPid().stream().limit(400).collect(Collectors.toList())); re.setCollectedfrom(entity.getCollectedfrom()); switch (EntityType.fromClass(clazz)) { From 3c6fc2096ca0df92f0b83ce406ac6218af02a7a5 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 7 Sep 2021 10:46:26 +0200 Subject: [PATCH 132/166] fix bug on oai iterator that skip record cleaned --- .../java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java index 3f767fd31..566c6b216 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java @@ -161,7 +161,7 @@ public class OaiIterator implements Iterator { report.put(e.getClass().getName(), e.getMessage()); final String cleaned = XmlCleaner.cleanAllEntities(xml); try { - doc = DocumentHelper.parseText(xml); + doc = DocumentHelper.parseText(cleaned); } catch (final DocumentException e1) { final String resumptionToken = extractResumptionToken(xml); if (resumptionToken == null) { From aed29156c78192ba4c2841fc6ae179c391f1b088 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 7 Sep 2021 19:05:46 +0200 Subject: [PATCH 133/166] changed behavior in transformation job, that doesn't fail at first error --- .../eu/dnetlib/dhp/transformation/TransformSparkJobNode.java | 4 +++- .../dhp/transformation/xslt/XSLTTransformationFunction.java | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java index 4fe79bf76..ed867c7f2 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.utils.DHPUtils.*; import java.io.IOException; import java.util.Map; +import java.util.Objects; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -126,7 +127,8 @@ public class TransformSparkJobNode { JavaRDD mdstore = inputMDStore .javaRDD() .repartition(getRepartitionNumber(totalInput, rpt)) - .map((Function) x::call); + .map((Function) x::call) + .filter((Function) Objects::nonNull); saveDataset(spark.createDataset(mdstore.rdd(), encoder), outputBasePath + MDSTORE_DATA_PATH); log.info("Transformed item {}", ct.getProcessedItems().count()); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java index acf48ccc5..54192a7be 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java @@ -81,7 +81,8 @@ public class XSLTTransformationFunction implements MapFunction Date: Tue, 7 Sep 2021 23:17:13 +0300 Subject: [PATCH 134/166] added indicators of sprint 2 in monitor db --- .../scripts/step20-createMonitorDB.sql | 41 +++++++++++++------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql index 5da028304..9ea50d488 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step20-createMonitorDB.sql @@ -104,25 +104,42 @@ create table TARGET.project_results as select id as result, project as id from T compute stats TARGET.project_results; -- indicators -create table TARGET.indi_pub_green_oa as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_pub_green_oa; - -create table TARGET.indi_pub_grey_lit as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_pub_grey_lit; - -create table TARGET.indi_pub_doi_from_crossref as select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_pub_doi_from_crossref; - -create table TARGET.indi_pub_gold_oa as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); -compute stats TARGET.indi_pub_gold_oa; - +create view TARGET.indi_dataset_avg_year_content_oa as select * from SOURCE.indi_dataset_avg_year_content_oa orig; +create view TARGET.indi_dataset_avg_year_context_oa as select * from SOURCE.indi_dataset_avg_year_context_oa orig; create view TARGET.indi_dataset_avg_year_country_oa as select * from SOURCE.indi_dataset_avg_year_country_oa orig; + +create view TARGET.indi_other_avg_year_content_oa as select * from SOURCE.indi_other_avg_year_content_oa orig; +create view TARGET.indi_other_avg_year_context_oa as select * from SOURCE.indi_other_avg_year_context_oa orig; +create view TARGET.indi_other_avg_year_country_oa as select * from SOURCE.indi_other_avg_year_country_oa orig; + create view TARGET.indi_project_datasets_count as select * from SOURCE.indi_project_datasets_count orig; create view TARGET.indi_project_otherresearch_count as select * from SOURCE.indi_project_otherresearch_count orig; create view TARGET.indi_project_pubs_count as select * from SOURCE.indi_project_pubs_count orig; create view TARGET.indi_project_software_count as select * from SOURCE.indi_project_software_count orig; + +create view TARGET.indi_pub_avg_year_content_oa as select * from SOURCE.indi_pub_avg_year_content_oa orig; +create view TARGET.indi_pub_avg_year_context_oa as select * from SOURCE.indi_pub_avg_year_context_oa orig; create view TARGET.indi_pub_avg_year_country_oa as select * from SOURCE.indi_pub_avg_year_country_oa orig; +create table TARGET.indi_pub_green_oa as select * from SOURCE.indi_pub_green_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_green_oa; +create table TARGET.indi_pub_grey_lit as select * from SOURCE.indi_pub_grey_lit orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_grey_lit; +create table TARGET.indi_pub_doi_from_crossref as select * from SOURCE.indi_pub_doi_from_crossref orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_doi_from_crossref; +create table TARGET.indi_pub_gold_oa as select * from SOURCE.indi_pub_gold_oa orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_gold_oa; +create table TARGET.indi_pub_has_abstract as select * from SOURCE.indi_pub_has_abstract orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_has_abstract; +create table TARGET.indi_pub_has_cc_licence as select * from SOURCE.indi_pub_has_cc_licence orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_has_cc_licence; +create table TARGET.indi_pub_has_cc_licence_url as select * from SOURCE.indi_pub_has_cc_licence_url orig where exists (select 1 from TARGET.result r where r.id=orig.id); +compute stats TARGET.indi_pub_has_cc_licence_url; + +create view TARGET.indi_software_avg_year_content_oa as select * from SOURCE.indi_software_avg_year_content_oa orig; +create view TARGET.indi_software_avg_year_context_oa as select * from SOURCE.indi_software_avg_year_context_oa orig; +create view TARGET.indi_software_avg_year_country_oa as select * from SOURCE.indi_software_avg_year_country_oa orig; + --denorm alter table TARGET.result rename to TARGET.res_tmp; From 1250ae197f1cdc7865335b7a6005a8f2f611ea96 Mon Sep 17 00:00:00 2001 From: antleb Date: Wed, 8 Sep 2021 14:08:43 +0300 Subject: [PATCH 135/166] using new indicators for the definition of peerreviewed, gold, and green --- ....sql => step16-createIndicatorsTables.sql} | 0 .../graph/stats/oozie_app/scripts/step16.sql | 62 ------------------- .../scripts/step16_1-definitions.sql | 22 +++++++ .../dhp/oa/graph/stats/oozie_app/workflow.xml | 40 ++++++------ 4 files changed, 42 insertions(+), 82 deletions(-) rename dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/{step16_7-createIndicatorsTables.sql => step16-createIndicatorsTables.sql} (100%) delete mode 100644 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql create mode 100644 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql similarity index 100% rename from dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_7-createIndicatorsTables.sql rename to dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql deleted file mode 100644 index 481fd9e8c..000000000 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16.sql +++ /dev/null @@ -1,62 +0,0 @@ ----------------------------------------------------- --- Shortcuts for various definitions in stats db --- ----------------------------------------------------- - --- Peer reviewed: --- Results that have been collected from Crossref -create table ${stats_db_name}.result_peerreviewed as -with peer_reviewed as ( - select distinct r.id as id - from ${stats_db_name}.result r - join ${stats_db_name}.result_sources rs on rs.id=r.id - join ${stats_db_name}.datasource d on d.id=rs.datasource - where d.name='Crossref') -select distinct peer_reviewed.id as id, true as peer_reviewed -from peer_reviewed -union all -select distinct r.id as id, false as peer_reviewed -from ${stats_db_name}.result r -left outer join peer_reviewed pr on pr.id=r.id -where pr.id is null; - --- Green OA: --- OA results that are hosted by an Institutional repository and have NOT been harvested from a DOAJ journal. -create table ${stats_db_name}.result_greenoa as -with result_green as ( - select distinct r.id as id - from ${stats_db_name}.result r - join ${stats_db_name}.result_datasources rd on rd.id=r.id - join ${stats_db_name}.datasource d on d.id=rd.datasource - left outer join ( - select rd.id from ${stats_db_name}.result_datasources rd - join ${stats_db_name}.datasource d on rd.datasource=d.id - join ${stats_db_name}.datasource_sources sds on sds.id=d.id - join ${stats_db_name}.datasource sd on sd.id=sds.datasource - where sd.name='DOAJ-ARTICLES' - ) as doaj on doaj.id=r.id - where r.bestlicence in ('Open Access', 'Open Source') and d.type='Institutional Repository' and doaj.id is null) -select distinct result_green.id, true as green -from result_green -union all -select distinct r.id as id, false as green -from ${stats_db_name}.result r -left outer join result_green rg on rg.id=r.id -where rg.id is null; - --- GOLD OA: --- OA results that have been harvested from a DOAJ journal. -create table ${stats_db_name}.result_gold as -with result_gold as ( - select distinct r.id as id - from ${stats_db_name}.result r - join ${stats_db_name}.result_datasources rd on rd.id=r.id - join ${stats_db_name}.datasource d on d.id=rd.datasource - join ${stats_db_name}.datasource_sources sds on sds.id=d.id - join ${stats_db_name}.datasource sd on sd.id=sds.datasource - where r.type='publication' and r.bestlicence='Open Access' and sd.name='DOAJ-Articles') -select distinct result_gold.id, true as gold -from result_gold -union all -select distinct r.id, false as gold -from ${stats_db_name}.result r -where r.id not in (select id from result_gold); \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql new file mode 100644 index 000000000..484e0772c --- /dev/null +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql @@ -0,0 +1,22 @@ +---------------------------------------------------- +-- Shortcuts for various definitions in stats db --- +---------------------------------------------------- + +-- Peer reviewed: +create table ${stats_db_name}.result_peerreviewed as +select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed +from result r +left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id +left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id; + +-- Green OA: +create table ${stats_db_name}.result_greenoa as +select r.id, case when green.green_oa=1 then true else false end as green +from ${stats_db_name}.result r +left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id; + +-- GOLD OA: +create table ${stats_db_name}.result_gold as +select r.id, case when green.green_oa=1 then true else false end as green +from ${stats_db_name}.result r + left outer join ${stats_db_name}.indi_pub_gold_oa green on green.id=r.id; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index a329ca4bf..6d618e489 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -239,14 +239,27 @@ stats_db_name=${stats_db_name} openaire_db_name=${openaire_db_name} - +
- + + + ${jobTracker} + ${nameNode} + indicators.sh + ${stats_db_name} + ${wf:appPath()}/scripts/step16-createIndicatorsTables.sql + indicators.sh + + + + + + ${hive_jdbc_url} - + stats_db_name=${stats_db_name} openaire_db_name=${openaire_db_name} @@ -272,24 +285,11 @@ stats_db_name=${stats_db_name} openaire_db_name=${openaire_db_name} - + - - - ${jobTracker} - ${nameNode} - indicators.sh - ${stats_db_name} - ${wf:appPath()}/scripts/step16_7-createIndicatorsTables.sql - indicators.sh - - - - - - + ${jobTracker} ${nameNode} @@ -298,11 +298,11 @@ ${stats_db_name} contexts.sh - + - + ${jobTracker} ${nameNode} From c6ada217a19a7b96754417e67df421b9a07987bd Mon Sep 17 00:00:00 2001 From: antleb Date: Wed, 8 Sep 2021 22:34:59 +0300 Subject: [PATCH 136/166] fixed typo --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh index fb944f4ff..93faa43d6 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/indicators.sh @@ -15,5 +15,5 @@ hdfs dfs -copyToLocal $SCRIPT_PATH echo "Creating indicators" impala-shell -q "invalidate metadata" impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/^\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -c -f - -cat step16_7-createIndicatorsTables.sql | impala-shell -d $TARGET -f - +cat step16-createIndicatorsTables.sql | impala-shell -d $TARGET -f - echo "Indicators created" \ No newline at end of file From f13cca7e83f2b3f7328f5a3e0c111eee0d9879e3 Mon Sep 17 00:00:00 2001 From: antleb Date: Wed, 8 Sep 2021 23:07:58 +0300 Subject: [PATCH 137/166] moved dependencies of indicators before them... --- .../scripts/{step16_6.sql => step15_5.sql} | 0 .../dhp/oa/graph/stats/oozie_app/workflow.xml | 22 +++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) rename dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/{step16_6.sql => step15_5.sql} (100%) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql similarity index 100% rename from dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_6.sql rename to dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step15_5.sql diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index 6d618e489..5d18ad3e0 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -239,6 +239,17 @@ stats_db_name=${stats_db_name} openaire_db_name=${openaire_db_name} + + + + + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + @@ -274,17 +285,6 @@ stats_db_name=${stats_db_name} openaire_db_name=${openaire_db_name} - - - - - - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - From 43852bac0eb65769052b6a153c57940c8e3ed549 Mon Sep 17 00:00:00 2001 From: antleb Date: Mon, 13 Sep 2021 01:36:41 +0300 Subject: [PATCH 138/166] creating other::other concept for all contexts --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh index 6c5823b0c..6d42ab13d 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/contexts.sh @@ -16,7 +16,7 @@ curl -L ${CONTEXT_API}/contexts/?type=ri,community -H "accept: application/json" cat contexts.csv | cut -d , -f1 | xargs -I {} curl -L ${CONTEXT_API}/context/{}/?all=true | /usr/local/sbin/jq -r '.[]|"\(.id|split(":")[0]),\(.id),\(.label)"' > categories.csv cat categories.csv | cut -d , -f2 | sed 's/:/%3A/g'| xargs -I {} curl -L ${CONTEXT_API}/context/category/{}/?all=true | /usr/local/sbin/jq -r '.[]|"\(.id|split("::")[0])::\(.id|split("::")[1]),\(.id),\(.label)"' > concepts.csv cat contexts.csv | sed 's/^\(.*\),\(.*\)/\1,\1::other,\2/' >> categories.csv -cat categories.csv | grep -v ::other | sed 's/^.*,\(.*\),\(.*\)/\1,\1::other,\2/' >> concepts.csv +cat categories.csv | sed 's/^.*,\(.*\),\(.*\)/\1,\1::other,\2/' >> concepts.csv echo "uploading context data to hdfs" hdfs dfs -mkdir ${TMP} From 461bf90ca6eec21e45866e0a89db2ed5728afeea Mon Sep 17 00:00:00 2001 From: antleb Date: Mon, 13 Sep 2021 11:10:30 +0300 Subject: [PATCH 139/166] fixed the gold_oa definition --- .../oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql index 484e0772c..6e2d9a262 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql @@ -17,6 +17,6 @@ left outer join ${stats_db_name}.indi_pub_green_oa green on green.id=r.id; -- GOLD OA: create table ${stats_db_name}.result_gold as -select r.id, case when green.green_oa=1 then true else false end as green +select r.id, case when gold.gold_oa=1 then true else false end as gold from ${stats_db_name}.result r - left outer join ${stats_db_name}.indi_pub_gold_oa green on green.id=r.id; \ No newline at end of file + left outer join ${stats_db_name}.indi_pub_gold_oa gold on gold.id=r.id; \ No newline at end of file From 8fc89ae82278ce169fbd95c568d0a5d9a003a709 Mon Sep 17 00:00:00 2001 From: antleb Date: Mon, 13 Sep 2021 14:33:23 +0300 Subject: [PATCH 140/166] moved context table creation before indicators --- .../dhp/oa/graph/stats/oozie_app/workflow.xml | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index 5d18ad3e0..8fe05a933 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -250,6 +250,19 @@ stats_db_name=${stats_db_name} openaire_db_name=${openaire_db_name} + + + + + + + ${jobTracker} + ${nameNode} + contexts.sh + ${context_api_url} + ${stats_db_name} + contexts.sh + @@ -285,19 +298,6 @@ stats_db_name=${stats_db_name} openaire_db_name=${openaire_db_name} - - - - - - - ${jobTracker} - ${nameNode} - contexts.sh - ${context_api_url} - ${stats_db_name} - contexts.sh - From 9b1936701c852860f225c88b663fc4634668e175 Mon Sep 17 00:00:00 2001 From: antleb Date: Mon, 13 Sep 2021 21:07:44 +0300 Subject: [PATCH 141/166] fixed yet another typo --- .../oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql index 6e2d9a262..6b4d9b1b0 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16_1-definitions.sql @@ -5,7 +5,7 @@ -- Peer reviewed: create table ${stats_db_name}.result_peerreviewed as select r.id as id, case when doi.doi_from_crossref=1 and grey.grey_lit=0 then true else false end as peer_reviewed -from result r +from ${stats_db_name}.result r left outer join ${stats_db_name}.indi_pub_doi_from_crossref doi on doi.id=r.id left outer join ${stats_db_name}.indi_pub_grey_lit grey on grey.id=r.id; From de9bf3a161931e0e7fa4baa7ae12f5c84e70de5f Mon Sep 17 00:00:00 2001 From: antleb Date: Tue, 14 Sep 2021 01:29:08 +0300 Subject: [PATCH 142/166] added cc_licences and abstracts in observatory db --- .../graph/stats/oozie_app/scripts/step10.sql | 5 + .../scripts/step21-createObservatoryDB.sql | 92 +++++++++++-------- 2 files changed, 60 insertions(+), 37 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql index 77fbd3b18..fc0162a9c 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step10.sql @@ -23,6 +23,11 @@ CREATE OR REPLACE VIEW ${stats_db_name}.rndexpediture AS SELECT * FROM ${external_stats_db_name}.rndexpediture; +CREATE OR REPLACE VIEW ${stats_db_name}.licenses_normalized AS +SELECT * +FROM ${external_stats_db_name}.licenses_normalized; + + ------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------ -- Creation date of the database diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql index 40cdf3f6d..f0e5a8dab 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql @@ -1,41 +1,44 @@ create table TARGET.result_affiliated_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, c.code as ccode, c.name as cname + r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, c.code, c.name; +left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, c.code, c.name; create table TARGET.result_affiliated_year stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, r.year + case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, r.year from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year; +left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year; create table TARGET.result_affiliated_year_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, r.year, c.code as ccode, c.name as cname + r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year, c.code, c.name; +left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year, c.code, c.name; create table TARGET.result_affiliated_datasource stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, d.name as dname + case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, d.name as dname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization @@ -44,12 +47,13 @@ left outer join SOURCE.result_datasources rd on rd.id=r.id left outer join SOURCE.datasource d on d.id=rd.datasource left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name; +left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name; create table TARGET.result_affiliated_datasource_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, d.name as dname, c.code as ccode, c.name as cname + r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization @@ -58,35 +62,38 @@ left outer join SOURCE.result_datasources rd on rd.id=r.id left outer join SOURCE.datasource d on d.id=rd.datasource left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name, c.code, c.name; +left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name, c.code, c.name; create table TARGET.result_affiliated_organization stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, o.name as oname + r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, o.name as oname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name; +left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name; create table TARGET.result_affiliated_organization_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, o.name as oname, c.code as ccode, c.name as cname + r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name, c.code, c.name; +left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name, c.code, c.name; create table TARGET.result_affiliated_funder stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, p.funder as pfunder + case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization @@ -95,12 +102,13 @@ join SOURCE.result_projects rp on rp.id=r.id join SOURCE.project p on p.id=rp.project left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder; +left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder; create table TARGET.result_affiliated_funder_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, p.funder as pfunder, c.code as ccode, c.name as cname + r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization @@ -109,12 +117,13 @@ join SOURCE.result_projects rp on rp.id=r.id join SOURCE.project p on p.id=rp.project left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder, c.code, c.name; +left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder, c.code, c.name; create table TARGET.result_deposited_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, c.code as ccode, c.name as cname + r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -123,11 +132,12 @@ join SOURCE.organization o on o.id=dor.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, c.code, c.name; +left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, c.code, c.name; create table TARGET.result_deposited_year stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, r.year + case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, r.year from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -136,12 +146,13 @@ join SOURCE.organization o on o.id=dor.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year; +left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year; create table TARGET.result_deposited_year_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, r.year, c.code as ccode, c.name as cname + r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -150,12 +161,13 @@ join SOURCE.organization o on o.id=dor.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, r.year, c.code, c.name; +left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year, c.code, c.name; create table TARGET.result_deposited_datasource stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, d.name as dname + r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, d.name as dname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -164,12 +176,13 @@ join SOURCE.organization o on o.id=dor.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name; +left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name; create table TARGET.result_deposited_datasource_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, d.name as dname, c.code as ccode, c.name as cname + r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -178,11 +191,12 @@ join SOURCE.organization o on o.id=dor.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, d.name, c.code, c.name; +left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name, c.code, c.name; create table TARGET.result_deposited_organization stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, r.type, o.name as oname + case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, o.name as oname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -191,12 +205,13 @@ join SOURCE.organization o on o.id=dor.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name; +left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name; create table TARGET.result_deposited_organization_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, o.name as oname, c.code as ccode, c.name as cname + r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -205,12 +220,13 @@ join SOURCE.organization o on o.id=dor.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, o.name, c.code, c.name; +left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name, c.code, c.name; create table TARGET.result_deposited_funder stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, p.funder as pfunder + r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -221,12 +237,13 @@ join SOURCE.result_projects rp on rp.id=r.id join SOURCE.project p on p.id=rp.project left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder; +left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder; create table TARGET.result_deposited_funder_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, r.type, p.funder as pfunder, c.code as ccode, c.name as cname + r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -237,7 +254,8 @@ join SOURCE.result_projects rp on rp.id=r.id join SOURCE.project p on p.id=rp.project left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, p.funder, c.code, c.name; +left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder, c.code, c.name; compute stats TARGET.result_affiliated_country; compute stats TARGET.result_affiliated_year; @@ -256,4 +274,4 @@ compute stats TARGET.result_deposited_datasource_country; compute stats TARGET.result_deposited_organization; compute stats TARGET.result_deposited_organization_country; compute stats TARGET.result_deposited_funder; -compute stats TARGET.result_deposited_funder_country; +compute stats TARGET.result_deposited_funder_country; \ No newline at end of file From aefa36c54bc2b56cd906170cc0d25f8dbf4f6e48 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Tue, 14 Sep 2021 17:26:15 +0200 Subject: [PATCH 143/166] other task executions go ahead if UnknownHostException happens on a single task --- .../orcid/SparkDownloadOrcidAuthors.java | 18 +++++++++++++++++- .../orcid/SparkDownloadOrcidWorks.java | 16 +++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java index 2b8e42bf6..8f0b3a094 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java @@ -4,8 +4,11 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.FileNotFoundException; +import java.net.InetAddress; +import java.net.UnknownHostException; import java.text.SimpleDateFormat; import java.util.Date; +import java.util.List; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -18,6 +21,7 @@ import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.util.LongAccumulator; @@ -78,6 +82,7 @@ public class SparkDownloadOrcidAuthors { LongAccumulator errorHTTP503Acc = spark.sparkContext().longAccumulator("error_HTTP_503"); LongAccumulator errorHTTP525Acc = spark.sparkContext().longAccumulator("error_HTTP_525"); LongAccumulator errorHTTPGenericAcc = spark.sparkContext().longAccumulator("error_HTTP_Generic"); + LongAccumulator unknowHostAcc = spark.sparkContext().longAccumulator("error_unknowHost"); logger.info("Retrieving data from lamda sequence file"); JavaPairRDD lamdaFileRDD = sc @@ -107,7 +112,17 @@ public class SparkDownloadOrcidAuthors { httpGet.addHeader("Accept", "application/vnd.orcid+xml"); httpGet.addHeader("Authorization", String.format("Bearer %s", token)); long startReq = System.currentTimeMillis(); - CloseableHttpResponse response = client.execute(httpGet); + CloseableHttpResponse response = null; + try { + response = client.execute(httpGet); + } catch (UnknownHostException u) { + downloaded.setStatusCode(-1); + unknowHostAcc.add(1); + if (client != null) { + client.close(); + } + return downloaded.toTuple2(); + } long endReq = System.currentTimeMillis(); long reqTime = endReq - startReq; if (reqTime < 1000) { @@ -171,6 +186,7 @@ public class SparkDownloadOrcidAuthors { logger.info("errorHTTP503Acc: {}", errorHTTP503Acc.value()); logger.info("errorHTTP525Acc: {}", errorHTTP525Acc.value()); logger.info("errorHTTPGenericAcc: {}", errorHTTPGenericAcc.value()); + logger.info("unknowHostAcc: {}", unknowHostAcc.value()); }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java index cab538783..457c79adb 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java @@ -3,6 +3,8 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import java.net.InetAddress; +import java.net.UnknownHostException; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.*; @@ -96,6 +98,7 @@ public class SparkDownloadOrcidWorks { LongAccumulator errorHTTP503Acc = spark.sparkContext().longAccumulator("error_HTTP_503"); LongAccumulator errorHTTP525Acc = spark.sparkContext().longAccumulator("error_HTTP_525"); LongAccumulator errorHTTPGenericAcc = spark.sparkContext().longAccumulator("error_HTTP_Generic"); + LongAccumulator unknowHostAcc = spark.sparkContext().longAccumulator("error_unknowHost"); JavaPairRDD updatedAuthorsRDD = sc .sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class); @@ -154,7 +157,17 @@ public class SparkDownloadOrcidWorks { httpGet.addHeader("Accept", "application/vnd.orcid+xml"); httpGet.addHeader("Authorization", String.format("Bearer %s", token)); long startReq = System.currentTimeMillis(); - CloseableHttpResponse response = client.execute(httpGet); + CloseableHttpResponse response = null; + try { + response = client.execute(httpGet); + } catch (UnknownHostException u) { + downloaded.setStatusCode(-1); + unknowHostAcc.add(1); + if (client != null) { + client.close(); + } + return downloaded.toTuple2(); + } long endReq = System.currentTimeMillis(); long reqTime = endReq - startReq; if (reqTime < 1000) { @@ -219,6 +232,7 @@ public class SparkDownloadOrcidWorks { logger.info("errorHTTP503Acc: {}", errorHTTP503Acc.value()); logger.info("errorHTTP525Acc: {}", errorHTTP525Acc.value()); logger.info("errorHTTPGenericAcc: {}", errorHTTPGenericAcc.value()); + logger.info("unknowHostAcc: {}", unknowHostAcc.value()); }); } From 8b804e7fe1d1602d33bfb388a4e3d99c40acd22c Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Tue, 14 Sep 2021 17:30:52 +0200 Subject: [PATCH 144/166] removed unused imports --- .../eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java | 3 --- .../eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java | 1 - 2 files changed, 4 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java index 8f0b3a094..c0aa007e5 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java @@ -4,11 +4,9 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.FileNotFoundException; -import java.net.InetAddress; import java.net.UnknownHostException; import java.text.SimpleDateFormat; import java.util.Date; -import java.util.List; import java.util.Optional; import org.apache.commons.io.IOUtils; @@ -21,7 +19,6 @@ import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.util.LongAccumulator; diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java index 457c79adb..63ba151f6 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java @@ -3,7 +3,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.net.InetAddress; import java.net.UnknownHostException; import java.time.LocalDate; import java.time.format.DateTimeFormatter; From ebf53a1616b256de90aa62255d5c4e2b13d34237 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 15 Sep 2021 16:10:37 +0200 Subject: [PATCH 145/166] added cleaning for relation fields: subRelType & relClass according to dedicated vocabs --- .../dhp/oa/graph/clean/CleaningRuleMap.java | 31 ++++++++--- .../clean/GraphCleaningFunctionsTest.java | 55 ++++++++++++------- .../dnetlib/dhp/oa/graph/clean/relation.json | 10 ++++ .../dnetlib/dhp/oa/graph/clean/synonyms.txt | 12 +++- .../eu/dnetlib/dhp/oa/graph/clean/terms.txt | 39 ++++++++++++- pom.xml | 2 +- 6 files changed, 117 insertions(+), 32 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java index 95aa749b2..7a3583289 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/clean/CleaningRuleMap.java @@ -12,6 +12,7 @@ import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.AccessRight; import eu.dnetlib.dhp.schema.oaf.Country; import eu.dnetlib.dhp.schema.oaf.Qualifier; +import eu.dnetlib.dhp.schema.oaf.Relation; public class CleaningRuleMap extends HashMap, SerializableConsumer> implements Serializable { @@ -24,17 +25,31 @@ public class CleaningRuleMap extends HashMap, SerializableConsumer cleanQualifier(vocabularies, (Qualifier) o)); mapping.put(AccessRight.class, o -> cleanQualifier(vocabularies, (AccessRight) o)); - mapping.put(Country.class, o -> { - final Country c = (Country) o; - if (StringUtils.isBlank(c.getSchemeid())) { - c.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE); - c.setSchemename(ModelConstants.DNET_COUNTRY_TYPE); - } - cleanQualifier(vocabularies, c); - }); + mapping.put(Country.class, o -> cleanCountry(vocabularies, (Country) o)); + mapping.put(Relation.class, o -> cleanRelation(vocabularies, (Relation) o)); return mapping; } + private static void cleanRelation(VocabularyGroup vocabularies, Relation r) { + if (vocabularies.vocabularyExists(ModelConstants.DNET_RELATION_SUBRELTYPE)) { + Qualifier newValue = vocabularies.lookup(ModelConstants.DNET_RELATION_SUBRELTYPE, r.getSubRelType()); + r.setSubRelType(newValue.getClassid()); + } + if (vocabularies.vocabularyExists(ModelConstants.DNET_RELATION_RELCLASS)) { + Qualifier newValue = vocabularies.lookup(ModelConstants.DNET_RELATION_RELCLASS, r.getRelClass()); + r.setRelClass(newValue.getClassid()); + } + } + + private static void cleanCountry(VocabularyGroup vocabularies, Country o) { + final Country c = o; + if (StringUtils.isBlank(c.getSchemeid())) { + c.setSchemeid(ModelConstants.DNET_COUNTRY_TYPE); + c.setSchemename(ModelConstants.DNET_COUNTRY_TYPE); + } + cleanQualifier(vocabularies, c); + } + private static void cleanQualifier(VocabularyGroup vocabularies, Q q) { if (vocabularies.vocabularyExists(q.getSchemeid())) { Qualifier newValue = vocabularies.lookup(q.getSchemeid(), q.getClassid()); diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java index edcd72ab4..42d9f226c 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/clean/GraphCleaningFunctionsTest.java @@ -5,6 +5,7 @@ import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.lenient; import java.io.IOException; +import java.util.Collection; import java.util.List; import java.util.Set; import java.util.stream.Stream; @@ -16,12 +17,12 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions; import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -29,7 +30,8 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @ExtendWith(MockitoExtension.class) public class GraphCleaningFunctionsTest { - public static final ObjectMapper MAPPER = new ObjectMapper(); + public static final ObjectMapper MAPPER = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); @Mock private ISLookUpService isLookUpService; @@ -49,6 +51,23 @@ public class GraphCleaningFunctionsTest { mapping = CleaningRuleMap.create(vocabularies); } + @Test + void testCleanRelations() throws Exception { + + List lines = IOUtils + .readLines(getClass().getResourceAsStream("/eu/dnetlib/dhp/oa/graph/clean/relation.json")); + for (String json : lines) { + Relation r_in = MAPPER.readValue(json, Relation.class); + assertNotNull(r_in); + + assertFalse(vocabularies.getTerms(ModelConstants.DNET_RELATION_RELCLASS).contains(r_in.getRelClass())); + + Relation r_out = OafCleaner.apply(r_in, mapping); + assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_RELCLASS).contains(r_out.getRelClass())); + assertTrue(vocabularies.getTerms(ModelConstants.DNET_RELATION_SUBRELTYPE).contains(r_out.getSubRelType())); + } + } + @Test void testCleaning() throws Exception { @@ -87,7 +106,7 @@ public class GraphCleaningFunctionsTest { p_out .getPid() .stream() - .map(p -> p.getQualifier()) + .map(StructuredProperty::getQualifier) .allMatch(q -> pidTerms.contains(q.getClassid()))); List poi = p_out.getInstance(); @@ -101,8 +120,8 @@ public class GraphCleaningFunctionsTest { assertEquals(2, poii.getPid().size()); assertTrue( - poii.getPid().stream().filter(s -> s.getValue().equals("10.1007/s109090161569x")).findFirst().isPresent()); - assertTrue(poii.getPid().stream().filter(s -> s.getValue().equals("10.1008/abcd")).findFirst().isPresent()); + poii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1007/s109090161569x"))); + assertTrue(poii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1008/abcd"))); assertNotNull(poii.getAlternateIdentifier()); assertEquals(2, poii.getAlternateIdentifier().size()); @@ -111,16 +130,12 @@ public class GraphCleaningFunctionsTest { poii .getAlternateIdentifier() .stream() - .filter(s -> s.getValue().equals("10.1007/s109090161569x")) - .findFirst() - .isPresent()); + .anyMatch(s -> s.getValue().equals("10.1007/s109090161569x"))); assertTrue( poii .getAlternateIdentifier() .stream() - .filter(s -> s.getValue().equals("10.1009/qwerty")) - .findFirst() - .isPresent()); + .anyMatch(s -> s.getValue().equals("10.1009/qwerty"))); Publication p_cleaned = GraphCleaningFunctions.cleanup(p_out); @@ -142,8 +157,8 @@ public class GraphCleaningFunctionsTest { assertEquals(2, pcii.getPid().size()); assertTrue( - pcii.getPid().stream().filter(s -> s.getValue().equals("10.1007/s109090161569x")).findFirst().isPresent()); - assertTrue(pcii.getPid().stream().filter(s -> s.getValue().equals("10.1008/abcd")).findFirst().isPresent()); + pcii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1007/s109090161569x"))); + assertTrue(pcii.getPid().stream().anyMatch(s -> s.getValue().equals("10.1008/abcd"))); assertNotNull(pcii.getAlternateIdentifier()); assertEquals(1, pcii.getAlternateIdentifier().size()); @@ -151,9 +166,7 @@ public class GraphCleaningFunctionsTest { pcii .getAlternateIdentifier() .stream() - .filter(s -> s.getValue().equals("10.1009/qwerty")) - .findFirst() - .isPresent()); + .anyMatch(s -> s.getValue().equals("10.1009/qwerty"))); getAuthorPids(p_cleaned).forEach(pid -> { System.out @@ -172,17 +185,17 @@ public class GraphCleaningFunctionsTest { return pub .getAuthor() .stream() - .map(a -> a.getPid()) - .flatMap(p -> p.stream()) - .map(s -> s.getQualifier()); + .map(Author::getPid) + .flatMap(Collection::stream) + .map(StructuredProperty::getQualifier); } private Stream getAuthorPids(Result pub) { return pub .getAuthor() .stream() - .map(a -> a.getPid()) - .flatMap(p -> p.stream()); + .map(Author::getPid) + .flatMap(Collection::stream); } private List vocs() throws IOException { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json new file mode 100644 index 000000000..97764de00 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/relation.json @@ -0,0 +1,10 @@ +{"relType":"resultResult","subRelType":"citation","relClass":"cites","source":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","target":"50|openaire____::007a4870b31056f89b768cf508e1538e"} +{"relType":"resultResult","subRelType":"citation","relClass":"isCitedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"supplement","relClass":"isSupplementedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"part","relClass":"isPartOf","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"part","relClass":"hasPart","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"review","relClass":"isReviewedBy","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"review","relClass":"reviews","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"relationship","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} +{"relType":"resultResult","subRelType":"publicationDataset","relClass":"isRelatedTo","source":"50|openaire____::007a4870b31056f89b768cf508e1538e","target":"50|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556"} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt index 729296522..79dc7cd2d 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/synonyms.txt @@ -1231,4 +1231,14 @@ dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-学術雑誌論文(査 dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-紀要論文(査読有り) dnet:review_levels @=@ 0001 @=@ 印刷物/電子媒体-雑誌記事(査読有り) dnet:review_levels @=@ 0001 @=@ 原著論文(査読有り) -dnet:review_levels @=@ 0001 @=@ 査読論文 \ No newline at end of file +dnet:review_levels @=@ 0001 @=@ 査読論文 +dnet:relation_relClass @=@ Cites @=@ cites +dnet:relation_relClass @=@ IsCitedBy @=@ isCitedBy +dnet:relation_relClass @=@ HasPart @=@ hasPart +dnet:relation_relClass @=@ IsPartOf @=@ isPartOf +dnet:relation_relClass @=@ IsReviewedBy @=@ isReviewedBy +dnet:relation_relClass @=@ Reviews @=@ reviews +dnet:relation_relClass @=@ IsSupplementTo @=@ isSupplementTo +dnet:relation_relClass @=@ IsSupplementedBy @=@ isSupplementedBy +dnet:relation_relClass @=@ IsRelatedTo @=@ isRelatedTo +dnet:relation_subRelType @=@ relationship @=@ publicationDataset \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt index ba47aaf5c..bb1e5fbf9 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/clean/terms.txt @@ -1079,4 +1079,41 @@ dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/DATASET/IS_SUPPLEMENTED dnet:topic_types @=@ dnet:topic_types @=@ ENRICH/MISSING/AUTHOR/ORCID @=@ An Open Researcher and Contributor ID (ORCID) that can be associated to an author of your publications dnet:review_levels @=@ dnet:review_levels @=@ 0000 @=@ Unknown dnet:review_levels @=@ dnet:review_levels @=@ 0002 @=@ nonPeerReviewed -dnet:review_levels @=@ dnet:review_levels @=@ 0001 @=@ peerReviewed \ No newline at end of file +dnet:review_levels @=@ dnet:review_levels @=@ 0001 @=@ peerReviewed +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Cites @=@ Cites +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsCitedBy @=@ IsCitedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ HasPart @=@ HasPart +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsPartOf @=@ IsPartOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsReviewedBy @=@ IsReviewedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Reviews @=@ Reviews +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsSupplementTo @=@ IsSupplementTo +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsSupplementedBy @=@ IsSupplementedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsRelatedTo @=@ IsRelatedTo +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Compiles @=@ Compiles +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Continues @=@ Continues +dnet:relation_relClass @=@ dnet:relation_relClass @=@ Documents @=@ Documents +dnet:relation_relClass @=@ dnet:relation_relClass @=@ HasAmongTopNSimilarDocuments @=@ HasAmongTopNSimilarDocuments +dnet:relation_relClass @=@ dnet:relation_relClass @=@ HasVersion @=@ HasVersion +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsAmongTopNSimilarDocuments @=@ IsAmongTopNSimilarDocuments +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsCompiledBy @=@ IsCompiledBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsContinuedBy @=@ IsContinuedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsDerivedFrom @=@ IsDerivedFrom +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsDocumentedBy @=@ IsDocumentedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsNewVersionOf @=@ IsNewVersionOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsObsoletedBy @=@ IsObsoletedBy +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsOriginalFormOf @=@ IsOriginalFormOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsPreviousVersionOf @=@ IsPreviousVersionOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsSourceOf @=@ IsSourceOf +dnet:relation_relClass @=@ dnet:relation_relClass @=@ IsVariantFormOf @=@ IsVariantFormOf +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ affiliation @=@ affiliation +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ citation @=@ citation +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ dedup @=@ dedup +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ outcome @=@ outcome +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ part @=@ part +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ participation @=@ participation +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ provision @=@ provision +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ relationship @=@ relationship +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ review @=@ review +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ similarity @=@ similarity +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ supplement @=@ supplement +dnet:relation_subRelType @=@ dnet:relation_subRelType @=@ version @=@ version \ No newline at end of file diff --git a/pom.xml b/pom.xml index 99525ef85..61b0ad873 100644 --- a/pom.xml +++ b/pom.xml @@ -753,7 +753,7 @@ 3.3.3 3.4.2 [2.12,3.0) - [2.7.17] + [2.7.18] [4.0.3] [6.0.5] [3.1.6] From 663b1556d7adc0b9f8a94f75d99c299e216b08a1 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 15 Sep 2021 16:40:25 +0200 Subject: [PATCH 146/166] manually integrating PR#140 https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/140 --- .../main/java/eu/dnetlib/dhp/common/Constants.java | 6 ++++++ .../dhp/common/collection/HttpConnector2.java | 14 +++++++++++++- .../collection/plugin/oai/OaiCollectorPlugin.java | 4 ++-- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java index 8fab94e92..a62a0ac79 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java @@ -52,4 +52,10 @@ public class Constants { public static final String CONTENT_INVALIDRECORDS = "InvalidRecords"; public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems"; + // IETF Draft and used by Repositories like ZENODO , not included in APACHE HTTP java packages + // see https://ietf-wg-httpapi.github.io/ratelimit-headers/draft-ietf-httpapi-ratelimit-headers.html + public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT = "X-RateLimit-Limit"; + public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING = "X-RateLimit-Remaining"; + public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_RESET = "X-RateLimit-Reset"; + } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java index 724f5f0e1..dd46ab1f4 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java @@ -15,12 +15,13 @@ import org.apache.http.HttpHeaders; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import eu.dnetlib.dhp.common.Constants; import eu.dnetlib.dhp.common.aggregation.AggregatorReport; /** * Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java * - * @author jochen, michele, andrea, alessia, claudio + * @author jochen, michele, andrea, alessia, claudio, andreas */ public class HttpConnector2 { @@ -112,6 +113,17 @@ public class HttpConnector2 { } int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); + String rateLimit = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT); + String rateRemaining = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING); + + if ((rateLimit != null) && (rateRemaining != null) && (Integer.parseInt(rateRemaining) < 2)) { + if (retryAfter > 0) { + backoffAndSleep(retryAfter); + } else { + backoffAndSleep(1000); + } + } + if (is2xx(urlConn.getResponseCode())) { input = urlConn.getInputStream(); responseType = urlConn.getContentType(); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java index 878e286e0..2d04b2574 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java @@ -66,11 +66,11 @@ public class OaiCollectorPlugin implements CollectorPlugin { } if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) { - throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate); + throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + fromDate); } if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) { - throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate); + throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + untilDate); } final Iterator> iters = sets From e9ccdf853f128a144fb193546553fa1bdcd10399 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Wed, 15 Sep 2021 18:44:54 +0200 Subject: [PATCH 147/166] related to https://code-repo.d4science.org/D-Net/dnet-hadoop/issues/132 --- .../java/eu/dnetlib/dhp/PropagationConstant.java | 15 ++++++++------- .../SparkOrcidToResultFromSemRelJob.java | 5 +++-- ...SparkResultToCommunityFromOrganizationJob.java | 4 +++- .../SparkResultToCommunityThroughSemRelJob.java | 4 +++- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java index 0b4a80b2d..0d7c74475 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/PropagationConstant.java @@ -68,27 +68,28 @@ public class PropagationConstant { getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_COUNTRY_INSTREPO_CLASS_ID, - PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME)); + PROPAGATION_COUNTRY_INSTREPO_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS)); return nc; } public static DataInfo getDataInfo( - String inference_provenance, String inference_class_id, String inference_class_name) { + String inference_provenance, String inference_class_id, String inference_class_name, String qualifierSchema) { DataInfo di = new DataInfo(); di.setInferred(true); di.setDeletedbyinference(false); di.setTrust("0.85"); di.setInferenceprovenance(inference_provenance); - di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name)); + di.setProvenanceaction(getQualifier(inference_class_id, inference_class_name, qualifierSchema)); return di; } - public static Qualifier getQualifier(String inference_class_id, String inference_class_name) { + public static Qualifier getQualifier(String inference_class_id, String inference_class_name, String qualifierSchema) { Qualifier pa = new Qualifier(); pa.setClassid(inference_class_id); pa.setClassname(inference_class_name); - pa.setSchemeid(ModelConstants.DNET_PID_TYPES); - pa.setSchemename(ModelConstants.DNET_PID_TYPES); + pa.setSchemeid(qualifierSchema); + pa.setSchemename(qualifierSchema); return pa; } @@ -107,7 +108,7 @@ public class PropagationConstant { r.setRelClass(rel_class); r.setRelType(rel_type); r.setSubRelType(subrel_type); - r.setDataInfo(getDataInfo(inference_provenance, inference_class_id, inference_class_name)); + r.setDataInfo(getDataInfo(inference_provenance, inference_class_id, inference_class_name, ModelConstants.DNET_PROVENANCE_ACTIONS)); return r; } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index 40faef7f3..68949b900 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -173,13 +173,14 @@ public class SparkOrcidToResultFromSemRelJob { if (toaddpid) { StructuredProperty p = new StructuredProperty(); p.setValue(autoritative_author.getOrcid()); - p.setQualifier(getQualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME)); + p.setQualifier(getQualifier(ModelConstants.ORCID_PENDING, ModelConstants.ORCID_CLASSNAME, ModelConstants.DNET_PID_TYPES)); p .setDataInfo( getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_ID, - PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME)); + PROPAGATION_ORCID_TO_RESULT_FROM_SEM_REL_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS)); Optional> authorPid = Optional.ofNullable(author.getPid()); if (authorPid.isPresent()) { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index cb80a90ca..1289ff644 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -10,6 +10,7 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -128,7 +129,8 @@ public class SparkResultToCommunityFromOrganizationJob { getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID, - PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME))); + PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS))); propagatedContexts.add(newContext); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index 3690351fb..7f76ead94 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.*; import java.util.stream.Collectors; +import eu.dnetlib.dhp.schema.common.ModelConstants; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -124,7 +125,8 @@ public class SparkResultToCommunityThroughSemRelJob { getDataInfo( PROPAGATION_DATA_INFO_TYPE, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, - PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME))); + PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, + ModelConstants.DNET_PROVENANCE_ACTIONS))); return newContext; } return null; From dd2329849f0cf2455e34321f59e31c8d97235f7c Mon Sep 17 00:00:00 2001 From: antleb Date: Thu, 16 Sep 2021 13:50:34 +0300 Subject: [PATCH 148/166] fixed the definition of cc_licence --- .../scripts/step21-createObservatoryDB.sql | 167 ++++++++++++++---- 1 file changed, 131 insertions(+), 36 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql index f0e5a8dab..7c344b903 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql @@ -1,44 +1,61 @@ create table TARGET.result_affiliated_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname + r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +left outer join ( + select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + from SOURCE.result_licenses rl + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + group by rl.id +) rln on rln.id=r.id group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, c.code, c.name; create table TARGET.result_affiliated_year stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, r.year + case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, + rln.count > 0 as cc_licence, r.abstract as abstract, r.type, r.year from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +left outer join ( + select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + from SOURCE.result_licenses rl + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + group by rl.id +) rln on rln.id=r.id group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year; create table TARGET.result_affiliated_year_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname + r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +left outer join ( + select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + from SOURCE.result_licenses rl + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + group by rl.id +) rln on rln.id=r.id group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year, c.code, c.name; create table TARGET.result_affiliated_datasource stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, d.name as dname + case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, + rln.count > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization @@ -47,13 +64,18 @@ left outer join SOURCE.result_datasources rd on rd.id=r.id left outer join SOURCE.datasource d on d.id=rd.datasource left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +left outer join ( + select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + from SOURCE.result_licenses rl + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + group by rl.id +) rln on rln.id=r.id group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name; create table TARGET.result_affiliated_datasource_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname + r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization @@ -62,38 +84,54 @@ left outer join SOURCE.result_datasources rd on rd.id=r.id left outer join SOURCE.datasource d on d.id=rd.datasource left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +left outer join ( + select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + from SOURCE.result_licenses rl + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + group by rl.id +) rln on rln.id=r.id group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name, c.code, c.name; create table TARGET.result_affiliated_organization stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, o.name as oname + r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +left outer join ( + select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + from SOURCE.result_licenses rl + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + group by rl.id +) rln on rln.id=r.id group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name; create table TARGET.result_affiliated_organization_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname + r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +left outer join ( + select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + from SOURCE.result_licenses rl + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + group by rl.id +) rln on rln.id=r.id group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name, c.code, c.name; create table TARGET.result_affiliated_funder stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder + case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, + rln.count > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization @@ -102,13 +140,18 @@ join SOURCE.result_projects rp on rp.id=r.id join SOURCE.project p on p.id=rp.project left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +left outer join ( + select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + from SOURCE.result_licenses rl + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + group by rl.id +) rln on rln.id=r.id group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder; create table TARGET.result_affiliated_funder_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname + r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization @@ -117,13 +160,18 @@ join SOURCE.result_projects rp on rp.id=r.id join SOURCE.project p on p.id=rp.project left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +left outer join ( + select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + from SOURCE.result_licenses rl + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + group by rl.id +) rln on rln.id=r.id group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder, c.code, c.name; create table TARGET.result_deposited_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname + r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -132,12 +180,18 @@ join SOURCE.organization o on o.id=dor.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +left outer join ( + select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + from SOURCE.result_licenses rl + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + group by rl.id +) rln on rln.id=r.id group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, c.code, c.name; create table TARGET.result_deposited_year stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, r.year + case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, + rln.count > 0 as cc_licence, r.abstract as abstract, r.type, r.year from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -146,13 +200,18 @@ join SOURCE.organization o on o.id=dor.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +left outer join ( + select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + from SOURCE.result_licenses rl + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + group by rl.id +) rln on rln.id=r.id group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year; create table TARGET.result_deposited_year_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname + r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -161,13 +220,18 @@ join SOURCE.organization o on o.id=dor.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +left outer join ( + select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + from SOURCE.result_licenses rl + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + group by rl.id +) rln on rln.id=r.id group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year, c.code, c.name; create table TARGET.result_deposited_datasource stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, d.name as dname + r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -176,13 +240,18 @@ join SOURCE.organization o on o.id=dor.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +left outer join ( + select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + from SOURCE.result_licenses rl + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + group by rl.id +) rln on rln.id=r.id group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name; create table TARGET.result_deposited_datasource_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname + r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -191,12 +260,18 @@ join SOURCE.organization o on o.id=dor.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +left outer join ( + select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + from SOURCE.result_licenses rl + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + group by rl.id +) rln on rln.id=r.id group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name, c.code, c.name; create table TARGET.result_deposited_organization stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, o.name as oname + case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, + rln.count > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -205,13 +280,18 @@ join SOURCE.organization o on o.id=dor.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +left outer join ( + select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + from SOURCE.result_licenses rl + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + group by rl.id +) rln on rln.id=r.id group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name; create table TARGET.result_deposited_organization_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname + r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -220,13 +300,18 @@ join SOURCE.organization o on o.id=dor.organization join SOURCE.country c on c.code=o.country and c.continent_name='Europe' left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +left outer join ( + select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + from SOURCE.result_licenses rl + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + group by rl.id +) rln on rln.id=r.id group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name, c.code, c.name; create table TARGET.result_deposited_funder stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder + r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -237,13 +322,18 @@ join SOURCE.result_projects rp on rp.id=r.id join SOURCE.project p on p.id=rp.project left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +left outer join ( + select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + from SOURCE.result_licenses rl + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + group by rl.id +) rln on rln.id=r.id group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder; create table TARGET.result_deposited_funder_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, case when lower(rln.normalized) like 'cc-%' then true else false end as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname + r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -254,7 +344,12 @@ join SOURCE.result_projects rp on rp.id=r.id join SOURCE.project p on p.id=rp.project left outer join SOURCE.result_licenses rl on rl.id=r.id left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join SOURCE.licenses_normalized rln on rln.license=rl.type +left outer join ( + select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count + from SOURCE.result_licenses rl + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + group by rl.id +) rln on rln.id=r.id group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder, c.code, c.name; compute stats TARGET.result_affiliated_country; From 2943287d1005ebb004c9068d8b27ef046a935543 Mon Sep 17 00:00:00 2001 From: antleb Date: Thu, 16 Sep 2021 15:59:06 +0300 Subject: [PATCH 149/166] fixed the definition of cc_licence, part II --- .../scripts/step21-createObservatoryDB.sql | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql index 7c344b903..d71978a30 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql @@ -1,7 +1,7 @@ create table TARGET.result_affiliated_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname + r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization @@ -19,7 +19,7 @@ group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, create table TARGET.result_affiliated_year stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, - rln.count > 0 as cc_licence, r.abstract as abstract, r.type, r.year + coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization @@ -37,7 +37,7 @@ group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, create table TARGET.result_affiliated_year_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname + r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization @@ -55,7 +55,7 @@ group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, create table TARGET.result_affiliated_datasource stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, - rln.count > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname + coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization @@ -75,7 +75,7 @@ group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, create table TARGET.result_affiliated_datasource_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname + r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization @@ -95,7 +95,7 @@ group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, create table TARGET.result_affiliated_organization stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname + r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization @@ -113,7 +113,7 @@ group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, create table TARGET.result_affiliated_organization_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname + r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization @@ -131,7 +131,7 @@ group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, create table TARGET.result_affiliated_funder stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, - rln.count > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder + coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization @@ -151,7 +151,7 @@ group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, create table TARGET.result_affiliated_funder_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname + r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_organization ro on ro.id=r.id join SOURCE.organization o on o.id=ro.organization @@ -171,7 +171,7 @@ group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, create table TARGET.result_deposited_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname + r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -191,7 +191,7 @@ group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, create table TARGET.result_deposited_year stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, - rln.count > 0 as cc_licence, r.abstract as abstract, r.type, r.year + coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -211,7 +211,7 @@ group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, create table TARGET.result_deposited_year_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname + r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -231,7 +231,7 @@ group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, create table TARGET.result_deposited_datasource stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname + r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -251,7 +251,7 @@ group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, create table TARGET.result_deposited_datasource_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname + r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -271,7 +271,7 @@ group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, create table TARGET.result_deposited_organization stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, - rln.count > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname + coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -291,7 +291,7 @@ group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, create table TARGET.result_deposited_organization_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname + r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -311,7 +311,7 @@ group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, create table TARGET.result_deposited_funder stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder + r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') @@ -333,7 +333,7 @@ group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, create table TARGET.result_deposited_funder_country stored as parquet as select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, rln.count > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname + r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname from SOURCE.result r join SOURCE.result_datasources rd on rd.id=r.id join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') From 8b681dcf1b99cd7fa0c1046abe52b3b35622bf83 Mon Sep 17 00:00:00 2001 From: antleb Date: Sat, 18 Sep 2021 00:35:14 +0300 Subject: [PATCH 150/166] attempt to make the observatory wf run in hive --- .../oa/graph/stats/oozie_app/observatory.sh | 4 +- .../scripts/step21-createObservatoryDB.sql | 840 +++++++++++------- 2 files changed, 527 insertions(+), 317 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh index ff03bca03..7db8d40a5 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh @@ -18,7 +18,9 @@ echo "Creating observatory database" impala-shell -q "drop database if exists ${TARGET} cascade" impala-shell -q "create database if not exists ${TARGET}" impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f - -cat step21-createObservatoryDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | impala-shell -f - +cat step21-createObservatoryDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | hive -f - +impala-shell -q "invalidate metadata;" +impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -f - echo "Impala shell finished" echo "Updating shadow observatory database" diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql index d71978a30..f17b5358f 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql @@ -1,372 +1,580 @@ -create table TARGET.result_affiliated_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname +create table TARGET.result_cc_licence stored as parquet as +select r.id, coalesce(rln.count, 0) > 0 as cc_licence from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( + left outer join ( select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + left outer join SOURCE.licenses_normalized rln on rl.type=rln.license group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, c.code, c.name; +) rln on rln.id=r.id; + +create table TARGET.result_affiliated_country stored as parquet as +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + c.code as ccode, c.name as cname +from SOURCE.result r + join SOURCE.result_organization ro on ro.id=r.id + join SOURCE.organization o on o.id=ro.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name; create table TARGET.result_affiliated_year stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, - coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + r.year from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year; + join SOURCE.result_organization ro on ro.id=r.id + join SOURCE.organization o on o.id=ro.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year; create table TARGET.result_affiliated_year_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + r.year, c.code as ccode, c.name as cname from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year, c.code, c.name; + join SOURCE.result_organization ro on ro.id=r.id + join SOURCE.organization o on o.id=ro.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name; create table TARGET.result_affiliated_datasource stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, - coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + d.name as dname from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_datasources rd on rd.id=r.id -left outer join SOURCE.datasource d on d.id=rd.datasource -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name; + join SOURCE.result_organization ro on ro.id=r.id + join SOURCE.organization o on o.id=ro.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_datasources rd on rd.id=r.id + left outer join SOURCE.datasource d on d.id=rd.datasource + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name; create table TARGET.result_affiliated_datasource_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + d.name as dname, c.code as ccode, c.name as cname from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_datasources rd on rd.id=r.id -left outer join SOURCE.datasource d on d.id=rd.datasource -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name, c.code, c.name; + join SOURCE.result_organization ro on ro.id=r.id + join SOURCE.organization o on o.id=ro.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_datasources rd on rd.id=r.id + left outer join SOURCE.datasource d on d.id=rd.datasource + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name; create table TARGET.result_affiliated_organization stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + o.name as oname from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name; + join SOURCE.result_organization ro on ro.id=r.id + join SOURCE.organization o on o.id=ro.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name; create table TARGET.result_affiliated_organization_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + o.name as oname, c.code as ccode, c.name as cname from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name, c.code, c.name; + join SOURCE.result_organization ro on ro.id=r.id + join SOURCE.organization o on o.id=ro.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name; create table TARGET.result_affiliated_funder stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, - coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + p.funder as pfunder from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -join SOURCE.result_projects rp on rp.id=r.id -join SOURCE.project p on p.id=rp.project -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder; + join SOURCE.result_organization ro on ro.id=r.id + join SOURCE.organization o on o.id=ro.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + join SOURCE.result_projects rp on rp.id=r.id + join SOURCE.project p on p.id=rp.project + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder; create table TARGET.result_affiliated_funder_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + p.funder as pfunder, c.code as ccode, c.name as cname from SOURCE.result r -join SOURCE.result_organization ro on ro.id=r.id -join SOURCE.organization o on o.id=ro.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -join SOURCE.result_projects rp on rp.id=r.id -join SOURCE.project p on p.id=rp.project -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder, c.code, c.name; + join SOURCE.result_organization ro on ro.id=r.id + join SOURCE.organization o on o.id=ro.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + join SOURCE.result_projects rp on rp.id=r.id + join SOURCE.project p on p.id=rp.project + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name; create table TARGET.result_deposited_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, c.code as ccode, c.name as cname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + c.code as ccode, c.name as cname from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, c.code, c.name; + join SOURCE.result_datasources rd on rd.id=r.id + join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join SOURCE.datasource_organizations dor on dor.id=d.id + join SOURCE.organization o on o.id=dor.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name; create table TARGET.result_deposited_year stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, - coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + r.year from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year; + join SOURCE.result_datasources rd on rd.id=r.id + join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join SOURCE.datasource_organizations dor on dor.id=d.id + join SOURCE.organization o on o.id=dor.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year; create table TARGET.result_deposited_year_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, r.year, c.code as ccode, c.name as cname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + r.year, c.code as ccode, c.name as cname from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, r.year, c.code, c.name; + join SOURCE.result_datasources rd on rd.id=r.id + join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join SOURCE.datasource_organizations dor on dor.id=d.id + join SOURCE.organization o on o.id=dor.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name; create table TARGET.result_deposited_datasource stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + d.name as dname from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name; + join SOURCE.result_datasources rd on rd.id=r.id + join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join SOURCE.datasource_organizations dor on dor.id=d.id + join SOURCE.organization o on o.id=dor.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name; create table TARGET.result_deposited_datasource_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, d.name as dname, c.code as ccode, c.name as cname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + d.name as dname, c.code as ccode, c.name as cname from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, d.name, c.code, c.name; + join SOURCE.result_datasources rd on rd.id=r.id + join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join SOURCE.datasource_organizations dor on dor.id=d.id + join SOURCE.organization o on o.id=dor.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name; create table TARGET.result_deposited_organization stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, r.peer_reviewed, - coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + o.name as oname from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name; + join SOURCE.result_datasources rd on rd.id=r.id + join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join SOURCE.datasource_organizations dor on dor.id=d.id + join SOURCE.organization o on o.id=dor.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name; create table TARGET.result_deposited_organization_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, o.name as oname, c.code as ccode, c.name as cname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + o.name as oname, c.code as ccode, c.name as cname from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, o.name, c.code, c.name; + join SOURCE.result_datasources rd on rd.id=r.id + join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join SOURCE.datasource_organizations dor on dor.id=d.id + join SOURCE.organization o on o.id=dor.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name; create table TARGET.result_deposited_funder stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + p.funder as pfunder from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -join SOURCE.result_projects rp on rp.id=r.id -join SOURCE.project p on p.id=rp.project -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder; + join SOURCE.result_datasources rd on rd.id=r.id + join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join SOURCE.datasource_organizations dor on dor.id=d.id + join SOURCE.organization o on o.id=dor.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + join SOURCE.result_projects rp on rp.id=r.id + join SOURCE.project p on p.id=rp.project + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder; create table TARGET.result_deposited_funder_country stored as parquet as -select count(distinct r.id) as total, r.green, r.gold, case when rl.type is not null then true else false end as licence, - case when pids.pid is not null then true else false end as pid, case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, - r.peer_reviewed, coalesce(rln.count, 0) > 0 as cc_licence, r.abstract as abstract, r.type, p.funder as pfunder, c.code as ccode, c.name as cname +select + count(distinct r.id) as total, + r.green, + r.gold, + case when rl.type is not null then true else false end as licence, + case when pids.pid is not null then true else false end as pid, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end as oa, + r.peer_reviewed, + rln.cc_licence, + r.abstract as abstract, + r.authors > 1 as multiple_authors, + rpc.count > 1 as multiple_projects, + rfc.count > 1 as multiple_funders, + r.type, + p.funder as pfunder, c.code as ccode, c.name as cname from SOURCE.result r -join SOURCE.result_datasources rd on rd.id=r.id -join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') -join SOURCE.datasource_organizations dor on dor.id=d.id -join SOURCE.organization o on o.id=dor.organization -join SOURCE.country c on c.code=o.country and c.continent_name='Europe' -join SOURCE.result_projects rp on rp.id=r.id -join SOURCE.project p on p.id=rp.project -left outer join SOURCE.result_licenses rl on rl.id=r.id -left outer join SOURCE.result_pids pids on pids.id=r.id -left outer join ( - select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license - group by rl.id -) rln on rln.id=r.id -group by r.green, r.gold, licence, pid, oa, r.peer_reviewed, r.type, cc_licence, abstract, p.funder, c.code, c.name; + join SOURCE.result_datasources rd on rd.id=r.id + join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join SOURCE.datasource_organizations dor on dor.id=d.id + join SOURCE.organization o on o.id=dor.organization + join SOURCE.country c on c.code=o.country and c.continent_name='Europe' + join SOURCE.result_projects rp on rp.id=r.id + join SOURCE.project p on p.id=rp.project + left outer join SOURCE.result_licenses rl on rl.id=r.id + left outer join SOURCE.result_pids pids on pids.id=r.id + left outer join SOURCE.result_cc_licence rln on rln.id=r.id + left outer join SOURCE.result_projectcount rpc on rpc.id=r.id + left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, + case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name; -compute stats TARGET.result_affiliated_country; -compute stats TARGET.result_affiliated_year; -compute stats TARGET.result_affiliated_year_country; -compute stats TARGET.result_affiliated_datasource; -compute stats TARGET.result_affiliated_datasource_country; -compute stats TARGET.result_affiliated_organization; -compute stats TARGET.result_affiliated_organization_country; -compute stats TARGET.result_affiliated_funder; -compute stats TARGET.result_affiliated_funder_country; -compute stats TARGET.result_deposited_country; -compute stats TARGET.result_deposited_year; -compute stats TARGET.result_deposited_year_country; -compute stats TARGET.result_deposited_datasource; -compute stats TARGET.result_deposited_datasource_country; -compute stats TARGET.result_deposited_organization; -compute stats TARGET.result_deposited_organization_country; -compute stats TARGET.result_deposited_funder; -compute stats TARGET.result_deposited_funder_country; \ No newline at end of file +-- compute stats TARGET.result_affiliated_country; +-- compute stats TARGET.result_affiliated_year; +-- compute stats TARGET.result_affiliated_year_country; +-- compute stats TARGET.result_affiliated_datasource; +-- compute stats TARGET.result_affiliated_datasource_country; +-- compute stats TARGET.result_affiliated_organization; +-- compute stats TARGET.result_affiliated_organization_country; +-- compute stats TARGET.result_affiliated_funder; +-- compute stats TARGET.result_affiliated_funder_country; +-- compute stats TARGET.result_deposited_country; +-- compute stats TARGET.result_deposited_year; +-- compute stats TARGET.result_deposited_year_country; +-- compute stats TARGET.result_deposited_datasource; +-- compute stats TARGET.result_deposited_datasource_country; +-- compute stats TARGET.result_deposited_organization; +-- compute stats TARGET.result_deposited_organization_country; +-- compute stats TARGET.result_deposited_funder; +-- compute stats TARGET.result_deposited_funder_country; \ No newline at end of file From 853333bdde97db3d7ddc4c82fbe6ab5fa486ce83 Mon Sep 17 00:00:00 2001 From: miconis Date: Mon, 20 Sep 2021 16:21:47 +0200 Subject: [PATCH 151/166] implementation of the whitelist for similarity relations --- .../dhp/oa/dedup/SparkWhitelistSimRels.java | 151 +++ .../dhp/oa/dedup/scan/oozie_app/workflow.xml | 32 + .../oa/dedup/whitelistSimRels_parameters.json | 38 + .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 1181 +++++++++-------- .../dnetlib/dhp/dedup/whitelist.simrels.txt | 2 + 5 files changed, 856 insertions(+), 548 deletions(-) create mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java create mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/whitelist.simrels.txt diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java new file mode 100644 index 000000000..fa7d33570 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java @@ -0,0 +1,151 @@ +package eu.dnetlib.dhp.oa.dedup; + +import java.io.IOException; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.PairFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.dom4j.DocumentException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xml.sax.SAXException; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.oa.dedup.model.Block; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.utils.ISLookupClientFactory; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import eu.dnetlib.pace.config.DedupConfig; +import eu.dnetlib.pace.model.MapDocument; +import eu.dnetlib.pace.util.MapDocumentUtil; +import scala.Tuple2; +import scala.Tuple3; + +public class SparkWhitelistSimRels extends AbstractSparkAction { + + private static final Logger log = LoggerFactory.getLogger(SparkCreateSimRels.class); + + private static final String WHITELIST_SEPARATOR = "####"; + + public SparkWhitelistSimRels(ArgumentApplicationParser parser, SparkSession spark) { + super(parser, spark); + } + + public static void main(String[] args) throws Exception { + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json"))); + parser.parseArgument(args); + + SparkConf conf = new SparkConf(); + new SparkWhitelistSimRels(parser, getSparkSession(conf)) + .run(ISLookupClientFactory.getLookUpService(parser.get("isLookUpUrl"))); + } + + @Override + public void run(ISLookUpService isLookUpService) + throws DocumentException, IOException, ISLookUpException, SAXException { + + // read oozie parameters + final String graphBasePath = parser.get("graphBasePath"); + final String isLookUpUrl = parser.get("isLookUpUrl"); + final String actionSetId = parser.get("actionSetId"); + final String workingPath = parser.get("workingPath"); + final int numPartitions = Optional + .ofNullable(parser.get("numPartitions")) + .map(Integer::valueOf) + .orElse(NUM_PARTITIONS); + final String whiteListPath = parser.get("whiteListPath"); + + log.info("numPartitions: '{}'", numPartitions); + log.info("graphBasePath: '{}'", graphBasePath); + log.info("isLookUpUrl: '{}'", isLookUpUrl); + log.info("actionSetId: '{}'", actionSetId); + log.info("workingPath: '{}'", workingPath); + log.info("whiteListPath: '{}'", whiteListPath); + + JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + //file format: source####target + Dataset> whiteListRels = spark.createDataset(sc + .textFile(whiteListPath) + //check if the line is in the correct format: id1####id2 + .filter(s -> s.contains(WHITELIST_SEPARATOR) && s.split(WHITELIST_SEPARATOR).length == 2) + .map(s -> new Tuple2<>(s.split(WHITELIST_SEPARATOR)[0], s.split(WHITELIST_SEPARATOR)[1])) + .rdd(), + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + + // for each dedup configuration + for (DedupConfig dedupConf : getConfigurations(isLookUpService, actionSetId)) { + + final String entity = dedupConf.getWf().getEntityType(); + final String subEntity = dedupConf.getWf().getSubEntityValue(); + log.info("Adding whitelist simrels for: '{}'", subEntity); + + final String outputPath = DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity); + + Dataset> entities = spark.createDataset(sc + .textFile(DedupUtility.createEntityPath(graphBasePath, subEntity)) + .repartition(numPartitions) + .mapToPair( + (PairFunction) s -> { + MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, s); + return new Tuple2<>(d.getIdentifier(), "present"); + }) + .rdd(), + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + + Dataset> whiteListRels1 = whiteListRels + .joinWith(entities, whiteListRels.col("_1").equalTo(entities.col("_1")), "inner") + .map((MapFunction, Tuple2>, Tuple2>) Tuple2::_1, Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + + Dataset> whiteListRels2 = whiteListRels1 + .joinWith(entities, whiteListRels1.col("_2").equalTo(entities.col("_1")), "inner") + .map((MapFunction, Tuple2>, Tuple2>) Tuple2::_1, Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + + Dataset whiteListSimRels = whiteListRels2 + .map((MapFunction, Relation>) + r -> createSimRel(r._1(), r._2(), entity), + Encoders.bean(Relation.class) + ); + + saveParquet(whiteListSimRels, outputPath, SaveMode.Append); + } + } + + private Relation createSimRel(String source, String target, String entity) { + final Relation r = new Relation(); + r.setSource(source); + r.setTarget(target); + r.setSubRelType("dedupSimilarity"); + r.setRelClass("isSimilarTo"); + r.setDataInfo(new DataInfo()); + + switch (entity) { + case "result": + r.setRelType("resultResult"); + break; + case "organization": + r.setRelType("organizationOrganization"); + break; + default: + throw new IllegalArgumentException("unmanaged entity type: " + entity); + } + return r; + } +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml index 342d83e8e..02fdd8431 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml @@ -20,6 +20,10 @@ workingPath path for the working directory + + whiteListPath + path for the whitelist of similarity relations + dedupGraphPath path for the output graph @@ -130,6 +134,34 @@ --workingPath${workingPath} --numPartitions8000 + + + + + + + yarn + cluster + Add Whitelist Similarity Relations + eu.dnetlib.dhp.oa.dedup.SparkWhitelistSimRels + dhp-dedup-openaire-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.shuffle.partitions=3840 + + --graphBasePath${graphBasePath} + --isLookUpUrl${isLookUpUrl} + --actionSetId${actionSetId} + --workingPath${workingPath} + --whiteListPath${whiteListPath} + --numPartitions8000 + diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json new file mode 100644 index 000000000..0a5cad7c4 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json @@ -0,0 +1,38 @@ +[ + { + "paramName": "la", + "paramLongName": "isLookUpUrl", + "paramDescription": "address for the LookUp", + "paramRequired": true + }, + { + "paramName": "asi", + "paramLongName": "actionSetId", + "paramDescription": "action set identifier (name of the orchestrator)", + "paramRequired": true + }, + { + "paramName": "i", + "paramLongName": "graphBasePath", + "paramDescription": "the base path of the raw graph", + "paramRequired": true + }, + { + "paramName": "w", + "paramLongName": "workingPath", + "paramDescription": "path of the working directory", + "paramRequired": true + }, + { + "paramName": "np", + "paramLongName": "numPartitions", + "paramDescription": "number of partitions for the similarity relations intermediate phases", + "paramRequired": false + }, + { + "paramName": "wl", + "paramLongName": "whiteListPath", + "paramDescription": "whitelist file path for the addition of custom simrels", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index 2f992bd78..fa03f93a6 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -5,13 +5,16 @@ import static java.nio.file.Files.createTempDirectory; import static org.apache.spark.sql.functions.count; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.lenient; import java.io.File; +import java.io.FileReader; import java.io.IOException; import java.io.Serializable; import java.net.URISyntaxException; import java.nio.file.Paths; +import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; @@ -45,552 +48,634 @@ import scala.Tuple2; @TestMethodOrder(MethodOrderer.OrderAnnotation.class) public class SparkDedupTest implements Serializable { - @Mock(serializable = true) - ISLookUpService isLookUpService; - - private static SparkSession spark; - private static JavaSparkContext jsc; - - private static String testGraphBasePath; - private static String testOutputBasePath; - private static String testDedupGraphBasePath; - private static final String testActionSetId = "test-orchestrator"; - - @BeforeAll - public static void cleanUp() throws IOException, URISyntaxException { - - testGraphBasePath = Paths - .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/entities").toURI()) - .toFile() - .getAbsolutePath(); - testOutputBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") - .toAbsolutePath() - .toString(); - - testDedupGraphBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") - .toAbsolutePath() - .toString(); - - FileUtils.deleteDirectory(new File(testOutputBasePath)); - FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); - - final SparkConf conf = new SparkConf(); - conf.set("spark.sql.shuffle.partitions", "200"); - spark = SparkSession - .builder() - .appName(SparkDedupTest.class.getSimpleName()) - .master("local[*]") - .config(conf) - .getOrCreate(); - - jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - } - - @BeforeEach - public void setUp() throws IOException, ISLookUpException { - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains(testActionSetId))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator.xml"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("organization"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("publication"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("software"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("dataset"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("otherresearchproduct"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json"))); - } - - @Test - @Order(1) - void createSimRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateSimRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json"))); - - parser - .parseArgument( - new String[] { - "-i", testGraphBasePath, - "-asi", testActionSetId, - "-la", "lookupurl", - "-w", testOutputBasePath, - "-np", "50" - }); - - new SparkCreateSimRels(parser, spark).run(isLookUpService); - - long orgs_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) - .count(); - - long pubs_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) - .count(); - - long sw_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "software")) - .count(); - - long ds_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "dataset")) - .count(); - - long orp_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) - .count(); - - assertEquals(3082, orgs_simrel); - assertEquals(7036, pubs_simrel); - assertEquals(336, sw_simrel); - assertEquals(442, ds_simrel); - assertEquals(6750, orp_simrel); - } - - @Test - @Order(2) - void cutMergeRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateMergeRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); - - parser - .parseArgument( - new String[] { - "-i", - testGraphBasePath, - "-asi", - testActionSetId, - "-la", - "lookupurl", - "-w", - testOutputBasePath, - "-cc", - "3" - }); - - new SparkCreateMergeRels(parser, spark).run(isLookUpService); - - long orgs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - long pubs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - long sw_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - long ds_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - long orp_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - assertEquals(0, orgs_mergerel); - assertEquals(0, pubs_mergerel); - assertEquals(0, sw_mergerel); - assertEquals(0, ds_mergerel); - assertEquals(0, orp_mergerel); - - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel")); - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel")); - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/software_mergerel")); - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel")); - FileUtils - .deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel")); - } - - @Test - @Order(3) - void createMergeRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateMergeRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); - - parser - .parseArgument( - new String[] { - "-i", - testGraphBasePath, - "-asi", - testActionSetId, - "-la", - "lookupurl", - "-w", - testOutputBasePath - }); - - new SparkCreateMergeRels(parser, spark).run(isLookUpService); - - long orgs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") - .count(); - long pubs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") - .count(); - long sw_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") - .count(); - long ds_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") - .count(); - - long orp_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") - .count(); - - assertEquals(1272, orgs_mergerel); - assertEquals(1438, pubs_mergerel); - assertEquals(286, sw_mergerel); - assertEquals(472, ds_mergerel); - assertEquals(718, orp_mergerel); - - } - - @Test - @Order(4) - void createDedupRecordTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateDedupRecord.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createDedupRecord_parameters.json"))); - parser - .parseArgument( - new String[] { - "-i", - testGraphBasePath, - "-asi", - testActionSetId, - "-la", - "lookupurl", - "-w", - testOutputBasePath - }); - - new SparkCreateDedupRecord(parser, spark).run(isLookUpService); - - long orgs_deduprecord = jsc - .textFile(testOutputBasePath + "/" + testActionSetId + "/organization_deduprecord") - .count(); - long pubs_deduprecord = jsc - .textFile(testOutputBasePath + "/" + testActionSetId + "/publication_deduprecord") - .count(); - long sw_deduprecord = jsc - .textFile(testOutputBasePath + "/" + testActionSetId + "/software_deduprecord") - .count(); - long ds_deduprecord = jsc.textFile(testOutputBasePath + "/" + testActionSetId + "/dataset_deduprecord").count(); - long orp_deduprecord = jsc - .textFile( - testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_deduprecord") - .count(); - - assertEquals(85, orgs_deduprecord); - assertEquals(65, pubs_deduprecord); - assertEquals(51, sw_deduprecord); - assertEquals(97, ds_deduprecord); - assertEquals(89, orp_deduprecord); - } - - @Test - @Order(5) - void updateEntityTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkUpdateEntity.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/updateEntity_parameters.json"))); - parser - .parseArgument( - new String[] { - "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath - }); - - new SparkUpdateEntity(parser, spark).run(isLookUpService); - - long organizations = jsc.textFile(testDedupGraphBasePath + "/organization").count(); - long publications = jsc.textFile(testDedupGraphBasePath + "/publication").count(); - long projects = jsc.textFile(testDedupGraphBasePath + "/project").count(); - long datasource = jsc.textFile(testDedupGraphBasePath + "/datasource").count(); - long softwares = jsc.textFile(testDedupGraphBasePath + "/software").count(); - long dataset = jsc.textFile(testDedupGraphBasePath + "/dataset").count(); - long otherresearchproduct = jsc.textFile(testDedupGraphBasePath + "/otherresearchproduct").count(); - - long mergedOrgs = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedPubs = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedSw = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedDs = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedOrp = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - assertEquals(896, publications); - assertEquals(838, organizations); - assertEquals(100, projects); - assertEquals(100, datasource); - assertEquals(200, softwares); - assertEquals(389, dataset); - assertEquals(517, otherresearchproduct); - - long deletedOrgs = jsc - .textFile(testDedupGraphBasePath + "/organization") - .filter(this::isDeletedByInference) - .count(); - - long deletedPubs = jsc - .textFile(testDedupGraphBasePath + "/publication") - .filter(this::isDeletedByInference) - .count(); - - long deletedSw = jsc - .textFile(testDedupGraphBasePath + "/software") - .filter(this::isDeletedByInference) - .count(); - - long deletedDs = jsc - .textFile(testDedupGraphBasePath + "/dataset") - .filter(this::isDeletedByInference) - .count(); - - long deletedOrp = jsc - .textFile(testDedupGraphBasePath + "/otherresearchproduct") - .filter(this::isDeletedByInference) - .count(); - - assertEquals(mergedOrgs, deletedOrgs); - assertEquals(mergedPubs, deletedPubs); - assertEquals(mergedSw, deletedSw); - assertEquals(mergedDs, deletedDs); - assertEquals(mergedOrp, deletedOrp); - } - - @Test - @Order(6) - void propagateRelationTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkPropagateRelation.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json"))); - parser - .parseArgument( - new String[] { - "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath - }); - - new SparkPropagateRelation(parser, spark).run(isLookUpService); - - long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); - - assertEquals(4860, relations); - - // check deletedbyinference - final Dataset mergeRels = spark - .read() - .load(DedupUtility.createMergeRelPath(testOutputBasePath, "*", "*")) - .as(Encoders.bean(Relation.class)); - final JavaPairRDD mergedIds = mergeRels - .where("relClass == 'merges'") - .select(mergeRels.col("target")) - .distinct() - .toJavaRDD() - .mapToPair( - (PairFunction) r -> new Tuple2(r.getString(0), "d")); - - JavaRDD toCheck = jsc - .textFile(testDedupGraphBasePath + "/relation") - .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.source", json), json)) - .join(mergedIds) - .map(t -> t._2()._1()) - .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.target", json), json)) - .join(mergedIds) - .map(t -> t._2()._1()); - - long deletedbyinference = toCheck.filter(this::isDeletedByInference).count(); - long updated = toCheck.count(); - - assertEquals(updated, deletedbyinference); - } - - @Test - @Order(7) - void testRelations() throws Exception { - testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_1.json", 12, 10); - testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_2.json", 10, 2); - } - - private void testUniqueness(String path, int expected_total, int expected_unique) { - Dataset rel = spark - .read() - .textFile(getClass().getResource(path).getPath()) - .map( - (MapFunction) s -> new ObjectMapper().readValue(s, Relation.class), - Encoders.bean(Relation.class)); - - assertEquals(expected_total, rel.count()); - assertEquals(expected_unique, rel.distinct().count()); - } - - @AfterAll - public static void finalCleanUp() throws IOException { - FileUtils.deleteDirectory(new File(testOutputBasePath)); - FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); - } - - public boolean isDeletedByInference(String s) { - return s.contains("\"deletedbyinference\":true"); - } + @Mock(serializable = true) + ISLookUpService isLookUpService; + + private static SparkSession spark; + private static JavaSparkContext jsc; + + private static String testGraphBasePath; + private static String testOutputBasePath; + private static String testDedupGraphBasePath; + private static final String testActionSetId = "test-orchestrator"; + private static String whitelistPath; + private static List whiteList; + + private static String WHITELIST_SEPARATOR = "####"; + + @BeforeAll + public static void cleanUp() throws IOException, URISyntaxException { + + testGraphBasePath = Paths + .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/entities").toURI()) + .toFile() + .getAbsolutePath(); + testOutputBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") + .toAbsolutePath() + .toString(); + + testDedupGraphBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") + .toAbsolutePath() + .toString(); + + whitelistPath = Paths + .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/whitelist.simrels.txt").toURI()) + .toFile() + .getAbsolutePath(); + whiteList = IOUtils.readLines(new FileReader(whitelistPath)); + + FileUtils.deleteDirectory(new File(testOutputBasePath)); + FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); + + final SparkConf conf = new SparkConf(); + conf.set("spark.sql.shuffle.partitions", "200"); + spark = SparkSession + .builder() + .appName(SparkDedupTest.class.getSimpleName()) + .master("local[*]") + .config(conf) + .getOrCreate(); + + jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + } + + @BeforeEach + public void setUp() throws IOException, ISLookUpException { + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains(testActionSetId))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator.xml"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("organization"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("publication"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("software"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("dataset"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("otherresearchproduct"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json"))); + } + + @Test + @Order(1) + void createSimRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json"))); + + parser + .parseArgument( + new String[]{ + "-i", testGraphBasePath, + "-asi", testActionSetId, + "-la", "lookupurl", + "-w", testOutputBasePath, + "-np", "50" + }); + + new SparkCreateSimRels(parser, spark).run(isLookUpService); + + long orgs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) + .count(); + + long pubs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) + .count(); + + long sw_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "software")) + .count(); + + long ds_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "dataset")) + .count(); + + long orp_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) + .count(); + + assertEquals(3082, orgs_simrel); + assertEquals(7036, pubs_simrel); + assertEquals(336, sw_simrel); + assertEquals(442, ds_simrel); + assertEquals(6750, orp_simrel); + } + + @Test + @Order(2) + void whitelistSimRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkWhitelistSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json"))); + + parser + .parseArgument( + new String[]{ + "-i", testGraphBasePath, + "-asi", testActionSetId, + "-la", "lookupurl", + "-w", testOutputBasePath, + "-np", "50", + "-wl", whitelistPath + }); + + new SparkWhitelistSimRels(parser, spark).run(isLookUpService); + + long orgs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) + .count(); + + long pubs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) + .count(); + + long ds_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "dataset")) + .count(); + + long orp_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) + .count(); + + //entities simrels supposed to be equal to the number of previous step (no rels in whitelist) + assertEquals(3082, orgs_simrel); + assertEquals(7036, pubs_simrel); + assertEquals(442, ds_simrel); + assertEquals(6750, orp_simrel); + + //entities simrels to be different from the number of previous step (new simrels in the whitelist) + Dataset sw_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "software")); + + //check if the first relation in the whitelist exists + assertTrue(sw_simrel + .as(Encoders.bean(Relation.class)) + .toJavaRDD() + .filter(rel -> + rel.getSource().equalsIgnoreCase(whiteList.get(0).split(WHITELIST_SEPARATOR)[0]) && rel.getTarget().equalsIgnoreCase(whiteList.get(0).split(WHITELIST_SEPARATOR)[1])).count() > 0); + //check if the second relation in the whitelist exists + assertTrue(sw_simrel + .as(Encoders.bean(Relation.class)) + .toJavaRDD() + .filter(rel -> + rel.getSource().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[0]) && rel.getTarget().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[1])).count() > 0); + + assertEquals(338, sw_simrel.count()); + + } + + @Test + @Order(3) + void cutMergeRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateMergeRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); + + parser + .parseArgument( + new String[]{ + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath, + "-cc", + "3" + }); + + new SparkCreateMergeRels(parser, spark).run(isLookUpService); + + long orgs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + long pubs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + long sw_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + long ds_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + long orp_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + assertEquals(0, orgs_mergerel); + assertEquals(0, pubs_mergerel); + assertEquals(0, sw_mergerel); + assertEquals(0, ds_mergerel); + assertEquals(0, orp_mergerel); + + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel")); + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel")); + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/software_mergerel")); + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel")); + FileUtils + .deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel")); + } + + @Test + @Order(4) + void createMergeRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateMergeRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); + + parser + .parseArgument( + new String[]{ + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath + }); + + new SparkCreateMergeRels(parser, spark).run(isLookUpService); + + long orgs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .count(); + long pubs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .count(); + long sw_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") + .count(); + long ds_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") + .count(); + + long orp_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") + .count(); + + assertEquals(1272, orgs_mergerel); + assertEquals(1438, pubs_mergerel); + assertEquals(286, sw_mergerel); + assertEquals(472, ds_mergerel); + assertEquals(718, orp_mergerel); + + } + + @Test + @Order(5) + void createDedupRecordTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateDedupRecord.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createDedupRecord_parameters.json"))); + parser + .parseArgument( + new String[]{ + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath + }); + + new SparkCreateDedupRecord(parser, spark).run(isLookUpService); + + long orgs_deduprecord = jsc + .textFile(testOutputBasePath + "/" + testActionSetId + "/organization_deduprecord") + .count(); + long pubs_deduprecord = jsc + .textFile(testOutputBasePath + "/" + testActionSetId + "/publication_deduprecord") + .count(); + long sw_deduprecord = jsc + .textFile(testOutputBasePath + "/" + testActionSetId + "/software_deduprecord") + .count(); + long ds_deduprecord = jsc.textFile(testOutputBasePath + "/" + testActionSetId + "/dataset_deduprecord").count(); + long orp_deduprecord = jsc + .textFile( + testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_deduprecord") + .count(); + + assertEquals(85, orgs_deduprecord); + assertEquals(65, pubs_deduprecord); + assertEquals(49, sw_deduprecord); + assertEquals(97, ds_deduprecord); + assertEquals(89, orp_deduprecord); + } + + @Test + @Order(6) + void updateEntityTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkUpdateEntity.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/updateEntity_parameters.json"))); + parser + .parseArgument( + new String[]{ + "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath + }); + + new SparkUpdateEntity(parser, spark).run(isLookUpService); + + long organizations = jsc.textFile(testDedupGraphBasePath + "/organization").count(); + long publications = jsc.textFile(testDedupGraphBasePath + "/publication").count(); + long projects = jsc.textFile(testDedupGraphBasePath + "/project").count(); + long datasource = jsc.textFile(testDedupGraphBasePath + "/datasource").count(); + long softwares = jsc.textFile(testDedupGraphBasePath + "/software").count(); + long dataset = jsc.textFile(testDedupGraphBasePath + "/dataset").count(); + long otherresearchproduct = jsc.textFile(testDedupGraphBasePath + "/otherresearchproduct").count(); + + long mergedOrgs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedPubs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedSw = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedDs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedOrp = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + assertEquals(896, publications); + assertEquals(838, organizations); + assertEquals(100, projects); + assertEquals(100, datasource); + assertEquals(198, softwares); + assertEquals(389, dataset); + assertEquals(517, otherresearchproduct); + + long deletedOrgs = jsc + .textFile(testDedupGraphBasePath + "/organization") + .filter(this::isDeletedByInference) + .count(); + + long deletedPubs = jsc + .textFile(testDedupGraphBasePath + "/publication") + .filter(this::isDeletedByInference) + .count(); + + long deletedSw = jsc + .textFile(testDedupGraphBasePath + "/software") + .filter(this::isDeletedByInference) + .count(); + + long deletedDs = jsc + .textFile(testDedupGraphBasePath + "/dataset") + .filter(this::isDeletedByInference) + .count(); + + long deletedOrp = jsc + .textFile(testDedupGraphBasePath + "/otherresearchproduct") + .filter(this::isDeletedByInference) + .count(); + + assertEquals(mergedOrgs, deletedOrgs); + assertEquals(mergedPubs, deletedPubs); + assertEquals(mergedSw, deletedSw); + assertEquals(mergedDs, deletedDs); + assertEquals(mergedOrp, deletedOrp); + } + + @Test + @Order(7) + void propagateRelationTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkPropagateRelation.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json"))); + parser + .parseArgument( + new String[]{ + "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath + }); + + new SparkPropagateRelation(parser, spark).run(isLookUpService); + + long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); + + assertEquals(4860, relations); + + // check deletedbyinference + final Dataset mergeRels = spark + .read() + .load(DedupUtility.createMergeRelPath(testOutputBasePath, "*", "*")) + .as(Encoders.bean(Relation.class)); + final JavaPairRDD mergedIds = mergeRels + .where("relClass == 'merges'") + .select(mergeRels.col("target")) + .distinct() + .toJavaRDD() + .mapToPair( + (PairFunction) r -> new Tuple2(r.getString(0), "d")); + + JavaRDD toCheck = jsc + .textFile(testDedupGraphBasePath + "/relation") + .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.source", json), json)) + .join(mergedIds) + .map(t -> t._2()._1()) + .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.target", json), json)) + .join(mergedIds) + .map(t -> t._2()._1()); + + long deletedbyinference = toCheck.filter(this::isDeletedByInference).count(); + long updated = toCheck.count(); + + assertEquals(updated, deletedbyinference); + } + + @Test + @Order(8) + void testRelations() throws Exception { + testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_1.json", 12, 10); + testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_2.json", 10, 2); + } + + private void testUniqueness(String path, int expected_total, int expected_unique) { + Dataset rel = spark + .read() + .textFile(getClass().getResource(path).getPath()) + .map( + (MapFunction) s -> new ObjectMapper().readValue(s, Relation.class), + Encoders.bean(Relation.class)); + + assertEquals(expected_total, rel.count()); + assertEquals(expected_unique, rel.distinct().count()); + } + + @AfterAll + public static void finalCleanUp() throws IOException { + FileUtils.deleteDirectory(new File(testOutputBasePath)); + FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); + } + + public boolean isDeletedByInference(String s) { + return s.contains("\"deletedbyinference\":true"); + } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/whitelist.simrels.txt b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/whitelist.simrels.txt new file mode 100644 index 000000000..862ca466d --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/whitelist.simrels.txt @@ -0,0 +1,2 @@ +50|r37b0ad08687::f645b9729d1e1025a72c57883f0f2cac####50|r37b0ad08687::4c55b436743b5c49fa32cd582fd9e1aa +50|datacite____::a90f49f9fde5393c00633bea6e4e374a####50|datacite____::5f55cdee77303ba8a2bf9996c32a330c \ No newline at end of file From 92a63f78fe29bad5162af1a7964710a8d721ccf7 Mon Sep 17 00:00:00 2001 From: Enrico Ottonello Date: Mon, 20 Sep 2021 18:25:00 +0200 Subject: [PATCH 152/166] multiple download attempts handling if a connection to orcid server fails --- .../orcid/SparkDownloadOrcidAuthors.java | 103 +++---- .../orcid/SparkDownloadOrcidWorks.java | 97 +++---- .../doiboost/orcid/util/DownloadsReport.java | 10 + .../util/MultiAttemptsHttpConnector.java | 272 ++++++++++++++++++ .../doiboost/orcid/OrcidClientTest.java | 92 +++++- 5 files changed, 439 insertions(+), 135 deletions(-) create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/DownloadsReport.java create mode 100644 dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/MultiAttemptsHttpConnector.java diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java index c0aa007e5..c549beb03 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidAuthors.java @@ -4,7 +4,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.FileNotFoundException; -import java.net.UnknownHostException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Optional; @@ -13,10 +12,6 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -26,8 +21,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.doiboost.orcid.model.DownloadedRecordData; +import eu.dnetlib.doiboost.orcid.util.DownloadsReport; import eu.dnetlib.doiboost.orcid.util.HDFSUtil; +import eu.dnetlib.doiboost.orcid.util.MultiAttemptsHttpConnector; import scala.Tuple2; public class SparkDownloadOrcidAuthors { @@ -73,17 +72,12 @@ public class SparkDownloadOrcidAuthors { LongAccumulator parsedRecordsAcc = spark.sparkContext().longAccumulator("parsed_records"); LongAccumulator modifiedRecordsAcc = spark.sparkContext().longAccumulator("to_download_records"); LongAccumulator downloadedRecordsAcc = spark.sparkContext().longAccumulator("downloaded_records"); - LongAccumulator errorHTTP403Acc = spark.sparkContext().longAccumulator("error_HTTP_403"); - LongAccumulator errorHTTP404Acc = spark.sparkContext().longAccumulator("error_HTTP_404"); - LongAccumulator errorHTTP409Acc = spark.sparkContext().longAccumulator("error_HTTP_409"); - LongAccumulator errorHTTP503Acc = spark.sparkContext().longAccumulator("error_HTTP_503"); - LongAccumulator errorHTTP525Acc = spark.sparkContext().longAccumulator("error_HTTP_525"); - LongAccumulator errorHTTPGenericAcc = spark.sparkContext().longAccumulator("error_HTTP_Generic"); - LongAccumulator unknowHostAcc = spark.sparkContext().longAccumulator("error_unknowHost"); + LongAccumulator errorsAcc = spark.sparkContext().longAccumulator("errors"); - logger.info("Retrieving data from lamda sequence file"); + String lambdaFilePath = workingPath + lambdaFileName; + logger.info("Retrieving data from lamda sequence file: " + lambdaFilePath); JavaPairRDD lamdaFileRDD = sc - .sequenceFile(workingPath + lambdaFileName, Text.class, Text.class); + .sequenceFile(lambdaFilePath, Text.class, Text.class); final long lamdaFileRDDCount = lamdaFileRDD.count(); logger.info("Data retrieved: {}", lamdaFileRDDCount); @@ -104,57 +98,44 @@ public class SparkDownloadOrcidAuthors { final DownloadedRecordData downloaded = new DownloadedRecordData(); downloaded.setOrcidId(orcidId); downloaded.setLastModifiedDate(lastModifiedDate); - CloseableHttpClient client = HttpClients.createDefault(); - HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + orcidId + "/record"); - httpGet.addHeader("Accept", "application/vnd.orcid+xml"); - httpGet.addHeader("Authorization", String.format("Bearer %s", token)); + final HttpClientParams clientParams = new HttpClientParams(); + MultiAttemptsHttpConnector httpConnector = new MultiAttemptsHttpConnector(clientParams); + httpConnector.setAuthMethod(MultiAttemptsHttpConnector.BEARER); + httpConnector.setAcceptHeaderValue("application/vnd.orcid+xml"); + httpConnector.setAuthToken(token); + String apiUrl = "https://api.orcid.org/v3.0/" + orcidId + "/record"; + DownloadsReport report = new DownloadsReport(); long startReq = System.currentTimeMillis(); - CloseableHttpResponse response = null; + boolean downloadCompleted = false; + String record = ""; try { - response = client.execute(httpGet); - } catch (UnknownHostException u) { - downloaded.setStatusCode(-1); - unknowHostAcc.add(1); - if (client != null) { - client.close(); + record = httpConnector.getInputSource(apiUrl, report); + downloadCompleted = true; + } catch (CollectorException ce) { + if (!report.isEmpty()) { + int errCode = report.keySet().stream().findFirst().get(); + report.forEach((k, v) -> { + logger.error(k + " " + v); + }); + downloaded.setStatusCode(errCode); + } else { + downloaded.setStatusCode(-4); } - return downloaded.toTuple2(); + errorsAcc.add(1); } long endReq = System.currentTimeMillis(); long reqTime = endReq - startReq; if (reqTime < 1000) { Thread.sleep(1000 - reqTime); } - int statusCode = response.getStatusLine().getStatusCode(); - downloaded.setStatusCode(statusCode); - if (statusCode != 200) { - switch (statusCode) { - case 403: - errorHTTP403Acc.add(1); - break; - case 404: - errorHTTP404Acc.add(1); - break; - case 409: - errorHTTP409Acc.add(1); - break; - case 503: - errorHTTP503Acc.add(1); - break; - case 525: - errorHTTP525Acc.add(1); - break; - default: - errorHTTPGenericAcc.add(1); - } - return downloaded.toTuple2(); + if (downloadCompleted) { + downloaded.setStatusCode(200); + downloadedRecordsAcc.add(1); + downloaded + .setCompressedData( + ArgumentApplicationParser + .compressArgument(record)); } - downloadedRecordsAcc.add(1); - downloaded - .setCompressedData( - ArgumentApplicationParser - .compressArgument(IOUtils.toString(response.getEntity().getContent()))); - client.close(); return downloaded.toTuple2(); }; @@ -165,27 +146,17 @@ public class SparkDownloadOrcidAuthors { long authorsModifiedCount = authorsModifiedRDD.count(); logger.info("Authors modified count: {}", authorsModifiedCount); - logger.info("Start downloading ..."); - final JavaPairRDD pairRDD = authorsModifiedRDD .repartition(100) .map(downloadRecordFn) .mapToPair(t -> new Tuple2<>(new Text(t._1()), new Text(t._2()))); - saveAsSequenceFile(workingPath, outputPath, sc, pairRDD); logger.info("parsedRecordsAcc: {}", parsedRecordsAcc.value()); logger.info("modifiedRecordsAcc: {}", modifiedRecordsAcc.value()); logger.info("downloadedRecordsAcc: {}", downloadedRecordsAcc.value()); - logger.info("errorHTTP403Acc: {}", errorHTTP403Acc.value()); - logger.info("errorHTTP404Acc: {}", errorHTTP404Acc.value()); - logger.info("errorHTTP409Acc: {}", errorHTTP409Acc.value()); - logger.info("errorHTTP503Acc: {}", errorHTTP503Acc.value()); - logger.info("errorHTTP525Acc: {}", errorHTTP525Acc.value()); - logger.info("errorHTTPGenericAcc: {}", errorHTTPGenericAcc.value()); - logger.info("unknowHostAcc: {}", unknowHostAcc.value()); + logger.info("errorsAcc: {}", errorsAcc.value()); }); - } private static void saveAsSequenceFile(String workingPath, String outputPath, JavaSparkContext sc, diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java index 63ba151f6..0569bacfd 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/SparkDownloadOrcidWorks.java @@ -3,7 +3,6 @@ package eu.dnetlib.doiboost.orcid; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; -import java.net.UnknownHostException; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.*; @@ -12,10 +11,6 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -29,8 +24,12 @@ import com.google.gson.JsonElement; import com.google.gson.JsonParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.doiboost.orcid.model.DownloadedRecordData; +import eu.dnetlib.doiboost.orcid.util.DownloadsReport; import eu.dnetlib.doiboost.orcid.util.HDFSUtil; +import eu.dnetlib.doiboost.orcid.util.MultiAttemptsHttpConnector; import eu.dnetlib.doiboost.orcid.xml.XMLRecordParser; import scala.Tuple2; @@ -91,13 +90,7 @@ public class SparkDownloadOrcidWorks { .sparkContext() .longAccumulator("error_parsing_xml_found"); LongAccumulator downloadedRecordsAcc = spark.sparkContext().longAccumulator("downloaded_records"); - LongAccumulator errorHTTP403Acc = spark.sparkContext().longAccumulator("error_HTTP_403"); - LongAccumulator errorHTTP404Acc = spark.sparkContext().longAccumulator("error_HTTP_404"); - LongAccumulator errorHTTP409Acc = spark.sparkContext().longAccumulator("error_HTTP_409"); - LongAccumulator errorHTTP503Acc = spark.sparkContext().longAccumulator("error_HTTP_503"); - LongAccumulator errorHTTP525Acc = spark.sparkContext().longAccumulator("error_HTTP_525"); - LongAccumulator errorHTTPGenericAcc = spark.sparkContext().longAccumulator("error_HTTP_Generic"); - LongAccumulator unknowHostAcc = spark.sparkContext().longAccumulator("error_unknowHost"); + LongAccumulator errorsAcc = spark.sparkContext().longAccumulator("errors"); JavaPairRDD updatedAuthorsRDD = sc .sequenceFile(workingPath + "downloads/updated_authors/*", Text.class, Text.class); @@ -151,61 +144,44 @@ public class SparkDownloadOrcidWorks { final DownloadedRecordData downloaded = new DownloadedRecordData(); downloaded.setOrcidId(orcidId); downloaded.setLastModifiedDate(lastUpdateValue); - CloseableHttpClient client = HttpClients.createDefault(); - HttpGet httpGet = new HttpGet("https://api.orcid.org/v3.0/" + relativeWorkUrl); - httpGet.addHeader("Accept", "application/vnd.orcid+xml"); - httpGet.addHeader("Authorization", String.format("Bearer %s", token)); + final HttpClientParams clientParams = new HttpClientParams(); + MultiAttemptsHttpConnector httpConnector = new MultiAttemptsHttpConnector(clientParams); + httpConnector.setAuthMethod(MultiAttemptsHttpConnector.BEARER); + httpConnector.setAcceptHeaderValue("application/vnd.orcid+xml"); + httpConnector.setAuthToken(token); + String apiUrl = "https://api.orcid.org/v3.0/" + relativeWorkUrl; + DownloadsReport report = new DownloadsReport(); long startReq = System.currentTimeMillis(); - CloseableHttpResponse response = null; + boolean downloadCompleted = false; + String record = ""; try { - response = client.execute(httpGet); - } catch (UnknownHostException u) { - downloaded.setStatusCode(-1); - unknowHostAcc.add(1); - if (client != null) { - client.close(); + record = httpConnector.getInputSource(apiUrl, report); + downloadCompleted = true; + } catch (CollectorException ce) { + if (!report.isEmpty()) { + int errCode = report.keySet().stream().findFirst().get(); + report.forEach((k, v) -> { + logger.error(k + " " + v); + }); + downloaded.setStatusCode(errCode); + } else { + downloaded.setStatusCode(-4); } - return downloaded.toTuple2(); + errorsAcc.add(1); } long endReq = System.currentTimeMillis(); long reqTime = endReq - startReq; if (reqTime < 1000) { Thread.sleep(1000 - reqTime); } - int statusCode = response.getStatusLine().getStatusCode(); - downloaded.setStatusCode(statusCode); - if (statusCode != 200) { - switch (statusCode) { - case 403: - errorHTTP403Acc.add(1); - break; - case 404: - errorHTTP404Acc.add(1); - break; - case 409: - errorHTTP409Acc.add(1); - break; - case 503: - errorHTTP503Acc.add(1); - break; - case 525: - errorHTTP525Acc.add(1); - break; - default: - errorHTTPGenericAcc.add(1); - logger - .info( - "Downloading {} status code: {}", orcidId, - response.getStatusLine().getStatusCode()); - } - return downloaded.toTuple2(); + if (downloadCompleted) { + downloaded.setStatusCode(200); + downloadedRecordsAcc.add(1); + downloaded + .setCompressedData( + ArgumentApplicationParser + .compressArgument(record)); } - downloadedRecordsAcc.add(1); - downloaded - .setCompressedData( - ArgumentApplicationParser - .compressArgument(IOUtils.toString(response.getEntity().getContent()))); - client.close(); return downloaded.toTuple2(); }; @@ -226,12 +202,7 @@ public class SparkDownloadOrcidWorks { logger.info("errorLoadingXMLFoundAcc: {}", errorLoadingXMLFoundAcc.value()); logger.info("errorParsingXMLFoundAcc: {}", errorParsingXMLFoundAcc.value()); logger.info("downloadedRecordsAcc: {}", downloadedRecordsAcc.value()); - logger.info("errorHTTP403Acc: {}", errorHTTP403Acc.value()); - logger.info("errorHTTP409Acc: {}", errorHTTP409Acc.value()); - logger.info("errorHTTP503Acc: {}", errorHTTP503Acc.value()); - logger.info("errorHTTP525Acc: {}", errorHTTP525Acc.value()); - logger.info("errorHTTPGenericAcc: {}", errorHTTPGenericAcc.value()); - logger.info("unknowHostAcc: {}", unknowHostAcc.value()); + logger.info("errorsAcc: {}", errorsAcc.value()); }); } diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/DownloadsReport.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/DownloadsReport.java new file mode 100644 index 000000000..b06b0af90 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/DownloadsReport.java @@ -0,0 +1,10 @@ + +package eu.dnetlib.doiboost.orcid.util; + +import java.util.LinkedHashMap; + +public class DownloadsReport extends LinkedHashMap { + + public DownloadsReport() { + } +} diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/MultiAttemptsHttpConnector.java b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/MultiAttemptsHttpConnector.java new file mode 100644 index 000000000..5ef6efa26 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/orcid/util/MultiAttemptsHttpConnector.java @@ -0,0 +1,272 @@ + +package eu.dnetlib.doiboost.orcid.util; + +import static eu.dnetlib.dhp.utils.DHPUtils.MAPPER; + +import java.io.IOException; +import java.io.InputStream; +import java.net.*; +import java.util.List; +import java.util.Map; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.math.NumberUtils; +import org.apache.http.HttpHeaders; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; + +/** + * Derived from eu.dnetlib.dhp.common.collection.HttpConnector2 with custom report and Bearer auth + * + * @author enrico + */ +public class MultiAttemptsHttpConnector { + + private static final Logger log = LoggerFactory.getLogger(MultiAttemptsHttpConnector.class); + + private HttpClientParams clientParams; + + private String responseType = null; + + private static final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; + + private String authToken = ""; + private String acceptHeaderValue = ""; + private String authMethod = ""; + public final static String BEARER = "BEARER"; + + public MultiAttemptsHttpConnector() { + this(new HttpClientParams()); + } + + public MultiAttemptsHttpConnector(HttpClientParams clientParams) { + this.clientParams = clientParams; + CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL)); + } + + /** + * Given the URL returns the content via HTTP GET + * + * @param requestUrl the URL + * @param report the list of errors + * @return the content of the downloaded resource + * @throws CollectorException when retrying more than maxNumberOfRetry times + */ + public String getInputSource(final String requestUrl, DownloadsReport report) + throws CollectorException { + return attemptDownloadAsString(requestUrl, 1, report); + } + + private String attemptDownloadAsString(final String requestUrl, final int retryNumber, + final DownloadsReport report) throws CollectorException { + + try (InputStream s = attemptDownload(requestUrl, retryNumber, report)) { + return IOUtils.toString(s); + } catch (IOException e) { + log.error(e.getMessage(), e); + throw new CollectorException(e); + } + } + + private InputStream attemptDownload(final String requestUrl, final int retryNumber, + final DownloadsReport report) throws CollectorException, IOException { + + if (retryNumber > getClientParams().getMaxNumberOfRetry()) { + final String msg = String + .format( + "Max number of retries (%s/%s) exceeded, failing.", + retryNumber, getClientParams().getMaxNumberOfRetry()); + log.error(msg); + throw new CollectorException(msg); + } + + log.info("Request attempt {} [{}]", retryNumber, requestUrl); + + InputStream input = null; + + try { + if (getClientParams().getRequestDelay() > 0) { + backoffAndSleep(getClientParams().getRequestDelay()); + } + final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection(); + urlConn.setInstanceFollowRedirects(false); + urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000); + urlConn.setConnectTimeout(getClientParams().getConnectTimeOut() * 1000); + urlConn.addRequestProperty(HttpHeaders.USER_AGENT, userAgent); + + if (!getAcceptHeaderValue().isEmpty()) { + urlConn.addRequestProperty(HttpHeaders.ACCEPT, getAcceptHeaderValue()); + } + if (!getAuthToken().isEmpty() && getAuthMethod().equals(BEARER)) { + urlConn.addRequestProperty(HttpHeaders.AUTHORIZATION, String.format("Bearer %s", getAuthToken())); + } + + if (log.isDebugEnabled()) { + logHeaderFields(urlConn); + } + + int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); + if (is2xx(urlConn.getResponseCode())) { + input = urlConn.getInputStream(); + responseType = urlConn.getContentType(); + return input; + } + if (is3xx(urlConn.getResponseCode())) { + // REDIRECTS + final String newUrl = obtainNewLocation(urlConn.getHeaderFields()); + log.info("The requested url has been moved to {}", newUrl); + report + .put( + urlConn.getResponseCode(), + String.format("Moved to: %s", newUrl)); + urlConn.disconnect(); + if (retryAfter > 0) { + backoffAndSleep(retryAfter); + } + return attemptDownload(newUrl, retryNumber + 1, report); + } + if (is4xx(urlConn.getResponseCode()) || is5xx(urlConn.getResponseCode())) { + switch (urlConn.getResponseCode()) { + case HttpURLConnection.HTTP_NOT_FOUND: + case HttpURLConnection.HTTP_BAD_GATEWAY: + case HttpURLConnection.HTTP_UNAVAILABLE: + case HttpURLConnection.HTTP_GATEWAY_TIMEOUT: + if (retryAfter > 0) { + log + .warn( + "{} - waiting and repeating request after suggested retry-after {} sec.", + requestUrl, retryAfter); + backoffAndSleep(retryAfter * 1000); + } else { + log + .warn( + "{} - waiting and repeating request after default delay of {} sec.", + requestUrl, getClientParams().getRetryDelay()); + backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000); + } + report.put(urlConn.getResponseCode(), requestUrl); + urlConn.disconnect(); + return attemptDownload(requestUrl, retryNumber + 1, report); + default: + report + .put( + urlConn.getResponseCode(), + String + .format( + "%s Error: %s", requestUrl, urlConn.getResponseMessage())); + throw new CollectorException(urlConn.getResponseCode() + " error " + report); + } + } + throw new CollectorException( + String + .format( + "Unexpected status code: %s errors: %s", urlConn.getResponseCode(), + MAPPER.writeValueAsString(report))); + } catch (MalformedURLException | UnknownHostException e) { + log.error(e.getMessage(), e); + report.put(-2, e.getMessage()); + throw new CollectorException(e.getMessage(), e); + } catch (SocketTimeoutException | SocketException e) { + log.error(e.getMessage(), e); + report.put(-3, e.getMessage()); + backoffAndSleep(getClientParams().getRetryDelay() * retryNumber * 1000); + return attemptDownload(requestUrl, retryNumber + 1, report); + } + } + + private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { + log.debug("StatusCode: {}", urlConn.getResponseMessage()); + + for (Map.Entry> e : urlConn.getHeaderFields().entrySet()) { + if (e.getKey() != null) { + for (String v : e.getValue()) { + log.debug(" key: {} - value: {}", e.getKey(), v); + } + } + } + } + + private void backoffAndSleep(int sleepTimeMs) throws CollectorException { + log.info("I'm going to sleep for {}ms", sleepTimeMs); + try { + Thread.sleep(sleepTimeMs); + } catch (InterruptedException e) { + log.error(e.getMessage(), e); + throw new CollectorException(e); + } + } + + private int obtainRetryAfter(final Map> headerMap) { + for (String key : headerMap.keySet()) { + if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty()) + && NumberUtils.isCreatable(headerMap.get(key).get(0))) { + return Integer.parseInt(headerMap.get(key).get(0)) + 10; + } + } + return -1; + } + + private String obtainNewLocation(final Map> headerMap) throws CollectorException { + for (String key : headerMap.keySet()) { + if ((key != null) && key.equalsIgnoreCase(HttpHeaders.LOCATION) && (headerMap.get(key).size() > 0)) { + return headerMap.get(key).get(0); + } + } + throw new CollectorException("The requested url has been MOVED, but 'location' param is MISSING"); + } + + private boolean is2xx(final int statusCode) { + return statusCode >= 200 && statusCode <= 299; + } + + private boolean is4xx(final int statusCode) { + return statusCode >= 400 && statusCode <= 499; + } + + private boolean is3xx(final int statusCode) { + return statusCode >= 300 && statusCode <= 399; + } + + private boolean is5xx(final int statusCode) { + return statusCode >= 500 && statusCode <= 599; + } + + public String getResponseType() { + return responseType; + } + + public HttpClientParams getClientParams() { + return clientParams; + } + + public void setClientParams(HttpClientParams clientParams) { + this.clientParams = clientParams; + } + + public void setAuthToken(String authToken) { + this.authToken = authToken; + } + + private String getAuthToken() { + return authToken; + } + + public String getAcceptHeaderValue() { + return acceptHeaderValue; + } + + public void setAcceptHeaderValue(String acceptHeaderValue) { + this.acceptHeaderValue = acceptHeaderValue; + } + + public String getAuthMethod() { + return authMethod; + } + + public void setAuthMethod(String authMethod) { + this.authMethod = authMethod; + } +} diff --git a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java index 9ea7c6959..d4079058e 100644 --- a/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java +++ b/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcid/OrcidClientTest.java @@ -1,13 +1,11 @@ package eu.dnetlib.doiboost.orcid; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import java.io.*; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.text.ParseException; import java.text.SimpleDateFormat; @@ -17,7 +15,6 @@ import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.compress.utils.Lists; -import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; @@ -28,8 +25,11 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.schema.orcid.AuthorData; -import eu.dnetlib.doiboost.orcid.xml.XMLRecordParserTest; +import eu.dnetlib.doiboost.orcid.util.DownloadsReport; +import eu.dnetlib.doiboost.orcid.util.MultiAttemptsHttpConnector; import jdk.nashorn.internal.ir.annotations.Ignore; public class OrcidClientTest { @@ -49,7 +49,7 @@ public class OrcidClientTest { @BeforeAll private static void setUp() throws IOException { - testPath = Files.createTempDirectory(XMLRecordParserTest.class.getName()); + testPath = Files.createTempDirectory(OrcidClientTest.class.getName()); System.out.println("using test path: " + testPath); } @@ -349,4 +349,84 @@ public class OrcidClientTest { final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork); logToFile(testPath, "\n\nwork updated \n\n" + work); } + + @Test + void downloadUnknownHostExceptionTest() throws Exception { + logToFile(testPath, "downloadUnknownHostExceptionTest"); + final String orcid = "0000-0001-7291-3210"; + final HttpClientParams clientParams = new HttpClientParams(); + clientParams.setMaxNumberOfRetry(2); + MultiAttemptsHttpConnector httpConnector = new MultiAttemptsHttpConnector(clientParams); + httpConnector.setAuthMethod(MultiAttemptsHttpConnector.BEARER); + httpConnector.setAcceptHeaderValue("application/vnd.orcid+xml"); + httpConnector.setAuthToken("78fdb232-7105-4086-8570-e153f4198e3d"); + String wrongApiUrl = "https://api.orcid_UNKNOWN.org/v3.0/" + orcid + "/" + REQUEST_TYPE_RECORD; + String url = "UNKNOWN"; + DownloadsReport report = new DownloadsReport(); + try { + httpConnector.getInputSource(wrongApiUrl, report); + } catch (CollectorException ce) { + logToFile(testPath, "CollectorException downloading: " + ce.getMessage()); + } catch (Throwable t) { + logToFile(testPath, "Throwable downloading: " + t.getMessage()); + } + } + + @Test + void downloadAttemptSuccessTest() throws Exception { + logToFile(testPath, "downloadAttemptSuccessTest"); + final String orcid = "0000-0001-7291-3210"; + final HttpClientParams clientParams = new HttpClientParams(); + clientParams.setMaxNumberOfRetry(2); + MultiAttemptsHttpConnector httpConnector = new MultiAttemptsHttpConnector(clientParams); + httpConnector.setAuthMethod(MultiAttemptsHttpConnector.BEARER); + httpConnector.setAcceptHeaderValue("application/vnd.orcid+xml"); + httpConnector.setAuthToken("78fdb232-7105-4086-8570-e153f4198e3d"); + String apiUrl = "https://api.orcid.org/v3.0/" + orcid + "/" + REQUEST_TYPE_RECORD; + String url = "UNKNOWN"; + DownloadsReport report = new DownloadsReport(); + String record = httpConnector.getInputSource(apiUrl, report); + logToFile(testPath, "Downloaded at first attempt record: " + record); + } + + @Test + void downloadAttemptNotFoundTest() throws Exception { + logToFile(testPath, "downloadAttemptNotFoundTest"); + final HttpClientParams clientParams = new HttpClientParams(); + clientParams.setMaxNumberOfRetry(2); + MultiAttemptsHttpConnector httpConnector = new MultiAttemptsHttpConnector(clientParams); + httpConnector.setAuthMethod(MultiAttemptsHttpConnector.BEARER); + httpConnector.setAcceptHeaderValue("application/vnd.orcid+xml"); + httpConnector.setAuthToken("78fdb232-7105-4086-8570-e153f4198e3d"); + String apiUrl = "https://api.orcid.org/v3.0/NOTFOUND/" + REQUEST_TYPE_RECORD; + DownloadsReport report = new DownloadsReport(); + try { + httpConnector.getInputSource(apiUrl, report); + } catch (CollectorException ce) { + + } + report.forEach((k, v) -> { + try { + logToFile(testPath, k + " " + v); + } catch (IOException e) { + e.printStackTrace(); + } + }); + } + + @Test + @Ignore + void testDownloadedAuthor() throws Exception { + final String base64CompressedWork = "H4sIAAAAAAAAAI2Yy26jMBSG932KiD0hIe1MiwiVZjGLkWbX2XRHsFOsgs3YJmnefszFFy4+mUhtVPz9P/gcH/vQ9PWrrjYXzAVh9Bjst7tgg2nBEKEfx+DP28/wOdgImVOUV4ziY3DDInjNHlKOC8ZRMnxtmlyWxyDaqU+ofg7h/uX7IYwfn+Ngo25ARUKoxJzm1TEopWySKLper1vGC4LU74+IikgTWoFRW+SyfyyfxCBag4iQhBawyoGMDjdqJrnECJAZRquYLDEPaV5jv8oyWlXj+qTiXZLGr7KMiQbnjAOR6IY1W7C6hgIwjGt6SKGfHsY13ajHYipLIcIyJ5Xw6+akdvjEtyt4wxEwM6+VGph5N2zYr2ENhQRhKsmZYChmS1j7nFs6VIBPOwImKhyfMVeFg6GAWEjrcoQ4FoBmBGwVXYhagGHDBIEX+ZzUDiqyn35VN6rJUpUJ4zc/PAI2T03FbrUKJZQszWjV3zavVOjvVfoE01qB+YUUQPGNwHTt3luxJjdqh1AxJFBKLWOrSeCcF13RtxxYtlPOPqH6m+MLwVfoMQ2kdae2ArLajc6fTxkI1nIoegs0yB426pMO+0fSw07xDKMu0XKSde5C2VvrlVMijRzFwqY7XTJI1QMLWcmEzMxtDdxfHiYSgTNJnYJ1K9y5k0tUrMgrnGGaRiuXxxuClulYUbr0nBvpkYLjvgTCGsuSoex3f1CEvRPHKI184NJKtKeaiO7cD5E61bJ4F+9DFd7d01u8Tw6H5BBvvz8f3q3nXLGIeJULGdaqeVBBRK7rS7h/fNvvk/gpedxt4923dxP7Fc3KtKuc1BhlkrfYmeN4dcmrhmbw60+HmWw2CKgbTuqc32CXKTTmeTWT6bDBjPsQ0DTpnchdaYO0ayQ2FyLIiVREqs25aU8VKYLRbK0BsyZuqvr1MU2Sm/rDdhe/2CRN6FU/b+oBVyj1zqRtC5F8kAumfTclsl+s7EoNQu64nfOaVLeezX60Z3XCULLi6GI2IZGTEeey7fec9lBAuXawIHKcpifE7GABHWfoxLVfpUNPBXoMbZWrHFsR3bPAk9J9i2sw9nW6AQT1mpk++7JhW+v44Hmt8PomJqfD13jRnvFOSxCKtu6qHoyBbQ7cMFo750UEfGaXm6bEeplXIXj2hvL6mA7tzvIwmM9pbJFBG834POZdLGi2gH2u9u0K9HMwn5PTioFWLufzmrS4oNuU9Pkt2rf/2jMs7fMdm2rQTTM+j+49AzToAVuXYA1mD2k0+XdE9vAP+JYR5NcQAAA="; + final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork); + logToFile(testPath, "\n\ndownloaded author \n\n" + work); + } + + @Test + @Ignore + void testDownloadedWork() throws Exception { + final String base64CompressedWork = "H4sIAAAAAAAAANVa63LiOBb+z1Oo+LVbhbkGAlTCLE1Id9IhTQV6unr/CVvB2tiWR5Khmal5rX2BfbE9ki3b3Jzt6Y13h6pQSPrOXTo6knL10zffQxvCBWXBdbVVb1YRCWzm0GB9Xf28vLX6VSQkDhzssYBcV3dEVH8aVa62jL8M1RcKI2kBAYwNLnrtXrMPFCGW7nW10YSPBX8dq3XRb1swNGgomkaG3FBBV9SjcnddDaOVR+0qApUCMaSBJDzA3nXVlTIcNhrb7bbOuE0d+F43AtEwCENBnMjGUhtyjiSFGBqHCkkDu5gqB0rpSMgJsCJOAVmKMVRMuoRbAfbJeaoMY6h84q8gQi4Nz1NlmNQbnDNe4Ak1bLA28/0iB8TjBg1GMV5gdzxu0CGoxSBKlkMkpp44T3eINBxeyG5bKDABpJb7QF1guRpOsd/iOWRRhwSSPlNS5LNjsOHzHAXxmjlHmwBSr3DyTDgsNVLkkAxk6LDjcCIKaBJAtoo2FCagFTJBiyf5IdJwUAv2PJUaNUgXlgnju/PgBJDFKfTYzgdXFgXLYAzVLxH2wPWvrfQ9mKEVhG+oXbD4EsD+3H1txqaxgQwBPqRFIc0w2WoSBHNbLfqIF0zbfVymIbQ52VCyLVIzBRm6VeQVRFWNHuoHDASLeJH3jqDVUQXB5yrOH0ObE5UNLQe+R+1mu2U1u1Z7sGy2hq3esN2tt5oXf79qnELv8fGwkJYPmxSswD1uA6vVXrY7w+5g2G3WuxedjNsJmj2escJx33G/ZXsU5iAs/AyRR0WcjpRXBLglc0lM1BjP59bX1qw9Hn/+dH87/dy9vBikeinKkyzVHjoqJNWIk7QuE3KU6pES6O7MwsarJh44QW1KowcWOCxAC9tlzEPsGX3YrYGQICgS0JKzENach2bEoTYNyKEQzaJyQnzSqesKSaV3IhRx92L8tLAm7GerjbZUujSwlFnIobqKkTuth+Q4ED4Vqqypp5JyfK8ah5Ji0f8AZVSGT2TZVGXfBLw/liOyqdRpJqfyXr8ldyEZrehKkm8Jr/2hc3Qb7EVk9DfMJbU98pu3k+6aETXXBebCZpt23tBaBUfSZRxdo98eYmgNfRxrh3zAnldDM/37FvZ+IiWtoQfddgiaEGBIDGCG7btA7jgBP9svAK2h90l4yYqIGop5jgMHXA4J0NB9ksR+YTX0qFtfqACO01jGjDHFPx552AW2W0P3uvGROk4NLfTvCeNS8X9MaDg1rL9Qz6PYh7En3f4ZNmKS6nUfQYFmE6PYe05IYBqPFGaq5wHlYpaoDbYqxokVK+JBerz51z+BIzc+SfSdTHVrTiSYtZzGFNOdGrr5ohsLF2+NUguqppkDoua6/S6yXwAYu44pM+/HiZ1BwEDWMqYbC5fjZ+MEBwMjb4PRLdTFYWrUwiUhJH/H+G3pMl/7fjqJhTGwSwU5lnfLsVDmxIPvmRetbJeCOsvfaxWXbXWxLVziqNky51BLW1OP2JKzgNoASSa7Gk1WAfrLI9mirzBBIUD1r/W/AgrMla7CjEMOzYBJolo30/mnxd0SzadPt5+eZtMb9O7rEN1wNINgEA8Ha+IxNMdrHLCQRR4TFRCudnmB7m6GqD0YDCqW+lQqlfnndw93iw/TJ/RwN5k+TqZDNJkAQyUvUlWvktjrdgbQEeI1EapN8Grd7MOeYJlfajSxWVOMfcIhVQXgfcFsqhcceobVA/U3GjsbDCYrjVSKSz0wHo8Xym6dArRvvjsbAfUGouFr8s5lG9o72DVVSy1saDqMqlarWW+12r2GiIXXMzuAU6AQcLLqWf3mZRf6iOlsNQdda9BudhQnvNNdPWN8XA7BgU5G2k3pLADA75XD3BSnn3y+3M90SbZWGczkxiRVmfSaJrd0V8u0yG3CeYRyht7O07Ste45weuqNmhcpLO44woEPRq1eilLN/f3ntEqGPFfzi2PmudHTO3EOEKf60LdTyUeDr7KIIzKfTfqtdr896JxklQtbES/IQD7UyL+SZIJSXYhLHkHZ9oqEjPR1MRzWu550cDYdCeI9n+S4hzouUU76+UeCQJ0fjkKn0+v3m703i0Eh/z97BCDH/XAAziTIt4rH94j7s4dHbSY/HJ90e3qriBQL+MMxCGETs9j/QxiSQ5PaS63/QsZqdS8vOxdvtj7Oc//fL4dTI2LvDAfVA6erSDKe3+cPxw70j4c5HHZlfLT9iAEZYKjZkxOYKZxymJy659l/t+QZllC5bvVJrzShD5GN0/NkiaZyqNcJh0NrdngtTfp7wviaHB+SS1Ng7O+Sk3h5HodT4S8RyY78pUmGM6eEg1l8tVCa1KnvY/SgrzDKsxRLF46j+uahNKH3BE6lsIb1lUxpUhdS3WUE+u6nPP/qiyAsklumMhMz9SBNqeus0oQ+QXqwIa7m3qy87IhXnBLPI8kVXXlZMaASm5vAEqWuKYkvHMtbPdiPiIdm6dVmeVMZjX+lfnKDWmaRAT7ev6ctTfhEF3RoWnJeXlKfSXcHcsf69rk0wTd4Qx30RV9yl5et2Ipwqe/SS5MJXiU8vbIv2b/qZaC8PZ65AUwj9QJR3vx1mQ9b7VPy1FFebnSpWq7xi0qJuwA+fLYpL7rwJdLXobcSa97kM4Cl35f3YXmofp0+8R9gBc/XeXL9Vn38pH7mLTs27z9T8ky1n7ynlZ0I4le78rYzl6t/woG5krwQlpcRcLDD2UPkH5F73C9G5tFKfY0q/wa1TIHI0CgAAA=="; + final String work = ArgumentApplicationParser.decompressValue(base64CompressedWork); + logToFile(testPath, "\n\ndownloaded work \n\n" + work); + } } From 421d55265d7c567092bbf418cdce6b818a9dbd77 Mon Sep 17 00:00:00 2001 From: antleb Date: Tue, 21 Sep 2021 03:07:58 +0300 Subject: [PATCH 153/166] created hive action for observatory queries --- .../{observatory.sh => observatory-post.sh} | 9 - .../graph/stats/oozie_app/observatory-pre.sh | 16 + .../scripts/step21-createObservatoryDB.sql | 449 +++++++++--------- .../dhp/oa/graph/stats/oozie_app/workflow.xml | 30 +- 4 files changed, 258 insertions(+), 246 deletions(-) rename dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/{observatory.sh => observatory-post.sh} (63%) create mode 100644 dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-pre.sh diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-post.sh similarity index 63% rename from dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh rename to dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-post.sh index 7db8d40a5..db8d39af2 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-post.sh @@ -9,16 +9,7 @@ fi export SOURCE=$1 export TARGET=$2 export SHADOW=$3 -export SCRIPT_PATH=$4 -echo "Getting file from " $4 -hdfs dfs -copyToLocal $4 - -echo "Creating observatory database" -impala-shell -q "drop database if exists ${TARGET} cascade" -impala-shell -q "create database if not exists ${TARGET}" -impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f - -cat step21-createObservatoryDB.sql | sed s/SOURCE/$1/g | sed s/TARGET/$2/g1 | hive -f - impala-shell -q "invalidate metadata;" impala-shell -d ${TARGET} -q "show tables" --delimited | sed "s/\(.*\)/compute stats ${TARGET}.\1;/" | impala-shell -f - echo "Impala shell finished" diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-pre.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-pre.sh new file mode 100644 index 000000000..92543b8b8 --- /dev/null +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-pre.sh @@ -0,0 +1,16 @@ +export PYTHON_EGG_CACHE=/home/$(whoami)/.python-eggs +export link_folder=/tmp/impala-shell-python-egg-cache-$(whoami) +if ! [ -L $link_folder ] +then + rm -Rf "$link_folder" + ln -sfn ${PYTHON_EGG_CACHE}${link_folder} ${link_folder} +fi + +export SOURCE=$1 +export TARGET=$2 +export SHADOW=$3 + +echo "Creating observatory database" +impala-shell -q "drop database if exists ${TARGET} cascade" +impala-shell -q "create database if not exists ${TARGET}" +impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f - \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql index f17b5358f..e0bdcd685 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql @@ -1,14 +1,14 @@ -create table TARGET.result_cc_licence stored as parquet as +create table ${observatory_db_name}.result_cc_licence stored as parquet as select r.id, coalesce(rln.count, 0) > 0 as cc_licence -from SOURCE.result r +from ${stats_db_name}.result r left outer join ( select rl.id, sum(case when lower(rln.normalized) like 'cc-%' then 1 else 0 end) as count - from SOURCE.result_licenses rl - left outer join SOURCE.licenses_normalized rln on rl.type=rln.license + from ${stats_db_name}.result_licenses rl + left outer join ${stats_db_name}.licenses_normalized rln on rl.type=rln.license group by rl.id ) rln on rln.id=r.id; -create table TARGET.result_affiliated_country stored as parquet as +create table ${observatory_db_name}.result_affiliated_country stored as parquet as select count(distinct r.id) as total, r.green, @@ -24,20 +24,20 @@ select rfc.count > 1 as multiple_funders, r.type, c.code as ccode, c.name as cname -from SOURCE.result r - join SOURCE.result_organization ro on ro.id=r.id - join SOURCE.organization o on o.id=ro.organization - join SOURCE.country c on c.code=o.country and c.continent_name='Europe' - left outer join SOURCE.result_licenses rl on rl.id=r.id - left outer join SOURCE.result_pids pids on pids.id=r.id - left outer join SOURCE.result_cc_licence rln on rln.id=r.id - left outer join SOURCE.result_projectcount rpc on rpc.id=r.id - left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +from ${stats_db_name}.result r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.organization o on o.id=ro.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name; -create table TARGET.result_affiliated_year stored as parquet as +create table ${observatory_db_name}.result_affiliated_year stored as parquet as select count(distinct r.id) as total, r.green, @@ -53,20 +53,20 @@ select rfc.count > 1 as multiple_funders, r.type, r.year -from SOURCE.result r - join SOURCE.result_organization ro on ro.id=r.id - join SOURCE.organization o on o.id=ro.organization - join SOURCE.country c on c.code=o.country and c.continent_name='Europe' - left outer join SOURCE.result_licenses rl on rl.id=r.id - left outer join SOURCE.result_pids pids on pids.id=r.id - left outer join SOURCE.result_cc_licence rln on rln.id=r.id - left outer join SOURCE.result_projectcount rpc on rpc.id=r.id - left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +from ${stats_db_name}.result r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.organization o on o.id=ro.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year; -create table TARGET.result_affiliated_year_country stored as parquet as +create table ${observatory_db_name}.result_affiliated_year_country stored as parquet as select count(distinct r.id) as total, r.green, @@ -82,20 +82,20 @@ select rfc.count > 1 as multiple_funders, r.type, r.year, c.code as ccode, c.name as cname -from SOURCE.result r - join SOURCE.result_organization ro on ro.id=r.id - join SOURCE.organization o on o.id=ro.organization - join SOURCE.country c on c.code=o.country and c.continent_name='Europe' - left outer join SOURCE.result_licenses rl on rl.id=r.id - left outer join SOURCE.result_pids pids on pids.id=r.id - left outer join SOURCE.result_cc_licence rln on rln.id=r.id - left outer join SOURCE.result_projectcount rpc on rpc.id=r.id - left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +from ${stats_db_name}.result r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.organization o on o.id=ro.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name; -create table TARGET.result_affiliated_datasource stored as parquet as +create table ${observatory_db_name}.result_affiliated_datasource stored as parquet as select count(distinct r.id) as total, r.green, @@ -111,22 +111,22 @@ select rfc.count > 1 as multiple_funders, r.type, d.name as dname -from SOURCE.result r - join SOURCE.result_organization ro on ro.id=r.id - join SOURCE.organization o on o.id=ro.organization - join SOURCE.country c on c.code=o.country and c.continent_name='Europe' - left outer join SOURCE.result_datasources rd on rd.id=r.id - left outer join SOURCE.datasource d on d.id=rd.datasource - left outer join SOURCE.result_licenses rl on rl.id=r.id - left outer join SOURCE.result_pids pids on pids.id=r.id - left outer join SOURCE.result_cc_licence rln on rln.id=r.id - left outer join SOURCE.result_projectcount rpc on rpc.id=r.id - left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +from ${stats_db_name}.result r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.organization o on o.id=ro.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_datasources rd on rd.id=r.id + left outer join ${stats_db_name}.datasource d on d.id=rd.datasource + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name; -create table TARGET.result_affiliated_datasource_country stored as parquet as +create table ${observatory_db_name}.result_affiliated_datasource_country stored as parquet as select count(distinct r.id) as total, r.green, @@ -142,22 +142,22 @@ select rfc.count > 1 as multiple_funders, r.type, d.name as dname, c.code as ccode, c.name as cname -from SOURCE.result r - join SOURCE.result_organization ro on ro.id=r.id - join SOURCE.organization o on o.id=ro.organization - join SOURCE.country c on c.code=o.country and c.continent_name='Europe' - left outer join SOURCE.result_datasources rd on rd.id=r.id - left outer join SOURCE.datasource d on d.id=rd.datasource - left outer join SOURCE.result_licenses rl on rl.id=r.id - left outer join SOURCE.result_pids pids on pids.id=r.id - left outer join SOURCE.result_cc_licence rln on rln.id=r.id - left outer join SOURCE.result_projectcount rpc on rpc.id=r.id - left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +from ${stats_db_name}.result r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.organization o on o.id=ro.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_datasources rd on rd.id=r.id + left outer join ${stats_db_name}.datasource d on d.id=rd.datasource + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name; -create table TARGET.result_affiliated_organization stored as parquet as +create table ${observatory_db_name}.result_affiliated_organization stored as parquet as select count(distinct r.id) as total, r.green, @@ -173,20 +173,20 @@ select rfc.count > 1 as multiple_funders, r.type, o.name as oname -from SOURCE.result r - join SOURCE.result_organization ro on ro.id=r.id - join SOURCE.organization o on o.id=ro.organization - join SOURCE.country c on c.code=o.country and c.continent_name='Europe' - left outer join SOURCE.result_licenses rl on rl.id=r.id - left outer join SOURCE.result_pids pids on pids.id=r.id - left outer join SOURCE.result_cc_licence rln on rln.id=r.id - left outer join SOURCE.result_projectcount rpc on rpc.id=r.id - left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +from ${stats_db_name}.result r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.organization o on o.id=ro.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name; -create table TARGET.result_affiliated_organization_country stored as parquet as +create table ${observatory_db_name}.result_affiliated_organization_country stored as parquet as select count(distinct r.id) as total, r.green, @@ -202,20 +202,20 @@ select rfc.count > 1 as multiple_funders, r.type, o.name as oname, c.code as ccode, c.name as cname -from SOURCE.result r - join SOURCE.result_organization ro on ro.id=r.id - join SOURCE.organization o on o.id=ro.organization - join SOURCE.country c on c.code=o.country and c.continent_name='Europe' - left outer join SOURCE.result_licenses rl on rl.id=r.id - left outer join SOURCE.result_pids pids on pids.id=r.id - left outer join SOURCE.result_cc_licence rln on rln.id=r.id - left outer join SOURCE.result_projectcount rpc on rpc.id=r.id - left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +from ${stats_db_name}.result r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.organization o on o.id=ro.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name; -create table TARGET.result_affiliated_funder stored as parquet as +create table ${observatory_db_name}.result_affiliated_funder stored as parquet as select count(distinct r.id) as total, r.green, @@ -231,22 +231,22 @@ select rfc.count > 1 as multiple_funders, r.type, p.funder as pfunder -from SOURCE.result r - join SOURCE.result_organization ro on ro.id=r.id - join SOURCE.organization o on o.id=ro.organization - join SOURCE.country c on c.code=o.country and c.continent_name='Europe' - join SOURCE.result_projects rp on rp.id=r.id - join SOURCE.project p on p.id=rp.project - left outer join SOURCE.result_licenses rl on rl.id=r.id - left outer join SOURCE.result_pids pids on pids.id=r.id - left outer join SOURCE.result_cc_licence rln on rln.id=r.id - left outer join SOURCE.result_projectcount rpc on rpc.id=r.id - left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +from ${stats_db_name}.result r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.organization o on o.id=ro.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + join ${stats_db_name}.result_projects rp on rp.id=r.id + join ${stats_db_name}.project p on p.id=rp.project + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder; -create table TARGET.result_affiliated_funder_country stored as parquet as +create table ${observatory_db_name}.result_affiliated_funder_country stored as parquet as select count(distinct r.id) as total, r.green, @@ -262,22 +262,22 @@ select rfc.count > 1 as multiple_funders, r.type, p.funder as pfunder, c.code as ccode, c.name as cname -from SOURCE.result r - join SOURCE.result_organization ro on ro.id=r.id - join SOURCE.organization o on o.id=ro.organization - join SOURCE.country c on c.code=o.country and c.continent_name='Europe' - join SOURCE.result_projects rp on rp.id=r.id - join SOURCE.project p on p.id=rp.project - left outer join SOURCE.result_licenses rl on rl.id=r.id - left outer join SOURCE.result_pids pids on pids.id=r.id - left outer join SOURCE.result_cc_licence rln on rln.id=r.id - left outer join SOURCE.result_projectcount rpc on rpc.id=r.id - left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +from ${stats_db_name}.result r + join ${stats_db_name}.result_organization ro on ro.id=r.id + join ${stats_db_name}.organization o on o.id=ro.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + join ${stats_db_name}.result_projects rp on rp.id=r.id + join ${stats_db_name}.project p on p.id=rp.project + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name; -create table TARGET.result_deposited_country stored as parquet as +create table ${observatory_db_name}.result_deposited_country stored as parquet as select count(distinct r.id) as total, r.green, @@ -293,22 +293,22 @@ select rfc.count > 1 as multiple_funders, r.type, c.code as ccode, c.name as cname -from SOURCE.result r - join SOURCE.result_datasources rd on rd.id=r.id - join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') - join SOURCE.datasource_organizations dor on dor.id=d.id - join SOURCE.organization o on o.id=dor.organization - join SOURCE.country c on c.code=o.country and c.continent_name='Europe' - left outer join SOURCE.result_licenses rl on rl.id=r.id - left outer join SOURCE.result_pids pids on pids.id=r.id - left outer join SOURCE.result_cc_licence rln on rln.id=r.id - left outer join SOURCE.result_projectcount rpc on rpc.id=r.id - left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join ${stats_db_name}.datasource_organizations dor on dor.id=d.id + join ${stats_db_name}.organization o on o.id=dor.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, c.code, c.name; -create table TARGET.result_deposited_year stored as parquet as +create table ${observatory_db_name}.result_deposited_year stored as parquet as select count(distinct r.id) as total, r.green, @@ -324,22 +324,22 @@ select rfc.count > 1 as multiple_funders, r.type, r.year -from SOURCE.result r - join SOURCE.result_datasources rd on rd.id=r.id - join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') - join SOURCE.datasource_organizations dor on dor.id=d.id - join SOURCE.organization o on o.id=dor.organization - join SOURCE.country c on c.code=o.country and c.continent_name='Europe' - left outer join SOURCE.result_licenses rl on rl.id=r.id - left outer join SOURCE.result_pids pids on pids.id=r.id - left outer join SOURCE.result_cc_licence rln on rln.id=r.id - left outer join SOURCE.result_projectcount rpc on rpc.id=r.id - left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join ${stats_db_name}.datasource_organizations dor on dor.id=d.id + join ${stats_db_name}.organization o on o.id=dor.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year; -create table TARGET.result_deposited_year_country stored as parquet as +create table ${observatory_db_name}.result_deposited_year_country stored as parquet as select count(distinct r.id) as total, r.green, @@ -355,22 +355,22 @@ select rfc.count > 1 as multiple_funders, r.type, r.year, c.code as ccode, c.name as cname -from SOURCE.result r - join SOURCE.result_datasources rd on rd.id=r.id - join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') - join SOURCE.datasource_organizations dor on dor.id=d.id - join SOURCE.organization o on o.id=dor.organization - join SOURCE.country c on c.code=o.country and c.continent_name='Europe' - left outer join SOURCE.result_licenses rl on rl.id=r.id - left outer join SOURCE.result_pids pids on pids.id=r.id - left outer join SOURCE.result_cc_licence rln on rln.id=r.id - left outer join SOURCE.result_projectcount rpc on rpc.id=r.id - left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join ${stats_db_name}.datasource_organizations dor on dor.id=d.id + join ${stats_db_name}.organization o on o.id=dor.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, r.year, c.code, c.name; -create table TARGET.result_deposited_datasource stored as parquet as +create table ${observatory_db_name}.result_deposited_datasource stored as parquet as select count(distinct r.id) as total, r.green, @@ -386,22 +386,22 @@ select rfc.count > 1 as multiple_funders, r.type, d.name as dname -from SOURCE.result r - join SOURCE.result_datasources rd on rd.id=r.id - join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') - join SOURCE.datasource_organizations dor on dor.id=d.id - join SOURCE.organization o on o.id=dor.organization - join SOURCE.country c on c.code=o.country and c.continent_name='Europe' - left outer join SOURCE.result_licenses rl on rl.id=r.id - left outer join SOURCE.result_pids pids on pids.id=r.id - left outer join SOURCE.result_cc_licence rln on rln.id=r.id - left outer join SOURCE.result_projectcount rpc on rpc.id=r.id - left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join ${stats_db_name}.datasource_organizations dor on dor.id=d.id + join ${stats_db_name}.organization o on o.id=dor.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name; -create table TARGET.result_deposited_datasource_country stored as parquet as +create table ${observatory_db_name}.result_deposited_datasource_country stored as parquet as select count(distinct r.id) as total, r.green, @@ -417,22 +417,22 @@ select rfc.count > 1 as multiple_funders, r.type, d.name as dname, c.code as ccode, c.name as cname -from SOURCE.result r - join SOURCE.result_datasources rd on rd.id=r.id - join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') - join SOURCE.datasource_organizations dor on dor.id=d.id - join SOURCE.organization o on o.id=dor.organization - join SOURCE.country c on c.code=o.country and c.continent_name='Europe' - left outer join SOURCE.result_licenses rl on rl.id=r.id - left outer join SOURCE.result_pids pids on pids.id=r.id - left outer join SOURCE.result_cc_licence rln on rln.id=r.id - left outer join SOURCE.result_projectcount rpc on rpc.id=r.id - left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join ${stats_db_name}.datasource_organizations dor on dor.id=d.id + join ${stats_db_name}.organization o on o.id=dor.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, d.name, c.code, c.name; -create table TARGET.result_deposited_organization stored as parquet as +create table ${observatory_db_name}.result_deposited_organization stored as parquet as select count(distinct r.id) as total, r.green, @@ -448,22 +448,22 @@ select rfc.count > 1 as multiple_funders, r.type, o.name as oname -from SOURCE.result r - join SOURCE.result_datasources rd on rd.id=r.id - join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') - join SOURCE.datasource_organizations dor on dor.id=d.id - join SOURCE.organization o on o.id=dor.organization - join SOURCE.country c on c.code=o.country and c.continent_name='Europe' - left outer join SOURCE.result_licenses rl on rl.id=r.id - left outer join SOURCE.result_pids pids on pids.id=r.id - left outer join SOURCE.result_cc_licence rln on rln.id=r.id - left outer join SOURCE.result_projectcount rpc on rpc.id=r.id - left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join ${stats_db_name}.datasource_organizations dor on dor.id=d.id + join ${stats_db_name}.organization o on o.id=dor.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name; -create table TARGET.result_deposited_organization_country stored as parquet as +create table ${observatory_db_name}.result_deposited_organization_country stored as parquet as select count(distinct r.id) as total, r.green, @@ -479,22 +479,22 @@ select rfc.count > 1 as multiple_funders, r.type, o.name as oname, c.code as ccode, c.name as cname -from SOURCE.result r - join SOURCE.result_datasources rd on rd.id=r.id - join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') - join SOURCE.datasource_organizations dor on dor.id=d.id - join SOURCE.organization o on o.id=dor.organization - join SOURCE.country c on c.code=o.country and c.continent_name='Europe' - left outer join SOURCE.result_licenses rl on rl.id=r.id - left outer join SOURCE.result_pids pids on pids.id=r.id - left outer join SOURCE.result_cc_licence rln on rln.id=r.id - left outer join SOURCE.result_projectcount rpc on rpc.id=r.id - left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join ${stats_db_name}.datasource_organizations dor on dor.id=d.id + join ${stats_db_name}.organization o on o.id=dor.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, o.name, c.code, c.name; -create table TARGET.result_deposited_funder stored as parquet as +create table ${observatory_db_name}.result_deposited_funder stored as parquet as select count(distinct r.id) as total, r.green, @@ -510,24 +510,24 @@ select rfc.count > 1 as multiple_funders, r.type, p.funder as pfunder -from SOURCE.result r - join SOURCE.result_datasources rd on rd.id=r.id - join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') - join SOURCE.datasource_organizations dor on dor.id=d.id - join SOURCE.organization o on o.id=dor.organization - join SOURCE.country c on c.code=o.country and c.continent_name='Europe' - join SOURCE.result_projects rp on rp.id=r.id - join SOURCE.project p on p.id=rp.project - left outer join SOURCE.result_licenses rl on rl.id=r.id - left outer join SOURCE.result_pids pids on pids.id=r.id - left outer join SOURCE.result_cc_licence rln on rln.id=r.id - left outer join SOURCE.result_projectcount rpc on rpc.id=r.id - left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join ${stats_db_name}.datasource_organizations dor on dor.id=d.id + join ${stats_db_name}.organization o on o.id=dor.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + join ${stats_db_name}.result_projects rp on rp.id=r.id + join ${stats_db_name}.project p on p.id=rp.project + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder; -create table TARGET.result_deposited_funder_country stored as parquet as +create table ${observatory_db_name}.result_deposited_funder_country stored as parquet as select count(distinct r.id) as total, r.green, @@ -543,38 +543,19 @@ select rfc.count > 1 as multiple_funders, r.type, p.funder as pfunder, c.code as ccode, c.name as cname -from SOURCE.result r - join SOURCE.result_datasources rd on rd.id=r.id - join SOURCE.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') - join SOURCE.datasource_organizations dor on dor.id=d.id - join SOURCE.organization o on o.id=dor.organization - join SOURCE.country c on c.code=o.country and c.continent_name='Europe' - join SOURCE.result_projects rp on rp.id=r.id - join SOURCE.project p on p.id=rp.project - left outer join SOURCE.result_licenses rl on rl.id=r.id - left outer join SOURCE.result_pids pids on pids.id=r.id - left outer join SOURCE.result_cc_licence rln on rln.id=r.id - left outer join SOURCE.result_projectcount rpc on rpc.id=r.id - left outer join SOURCE.result_fundercount rfc on rfc.id=r.id +from ${stats_db_name}.result r + join ${stats_db_name}.result_datasources rd on rd.id=r.id + join ${stats_db_name}.datasource d on d.id=rd.datasource and d.type in ('Institutional Repository','Data Repository', 'Repository', 'Publication Repository') + join ${stats_db_name}.datasource_organizations dor on dor.id=d.id + join ${stats_db_name}.organization o on o.id=dor.organization + join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' + join ${stats_db_name}.result_projects rp on rp.id=r.id + join ${stats_db_name}.project p on p.id=rp.project + left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id + left outer join ${stats_db_name}.result_pids pids on pids.id=r.id + left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id + left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, case when r.access_mode in ('Open Access', 'Open Source') then true else false end, r.peer_reviewed, r.type, abstract, - cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name; - --- compute stats TARGET.result_affiliated_country; --- compute stats TARGET.result_affiliated_year; --- compute stats TARGET.result_affiliated_year_country; --- compute stats TARGET.result_affiliated_datasource; --- compute stats TARGET.result_affiliated_datasource_country; --- compute stats TARGET.result_affiliated_organization; --- compute stats TARGET.result_affiliated_organization_country; --- compute stats TARGET.result_affiliated_funder; --- compute stats TARGET.result_affiliated_funder_country; --- compute stats TARGET.result_deposited_country; --- compute stats TARGET.result_deposited_year; --- compute stats TARGET.result_deposited_year_country; --- compute stats TARGET.result_deposited_datasource; --- compute stats TARGET.result_deposited_datasource_country; --- compute stats TARGET.result_deposited_organization; --- compute stats TARGET.result_deposited_organization_country; --- compute stats TARGET.result_deposited_funder; --- compute stats TARGET.result_deposited_funder_country; \ No newline at end of file + cc_licence, r.authors > 1, rpc.count > 1, rfc.count > 1, p.funder, c.code, c.name; \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index 8fe05a933..08d33f4e8 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -326,20 +326,44 @@ ${wf:appPath()}/scripts/step20-createMonitorDB.sql monitor.sh + + + + + + + ${jobTracker} + ${nameNode} + observatory-pre.sh + ${stats_db_name} + ${observatory_db_name} + ${observatory_db_shadow_name} + observatory-pre.sh + + + ${hive_jdbc_url} + + stats_db_name=${stats_db_name} + observatory_db_name=${observatory_db_name} + + + + + + ${jobTracker} ${nameNode} - observatory.sh + observatory-post.sh ${stats_db_name} ${observatory_db_name} ${observatory_db_shadow_name} - ${wf:appPath()}/scripts/step21-createObservatoryDB.sql - observatory.sh + observatory-post.sh From f358cabb2bc63be8d31a069afbe476465a4c1038 Mon Sep 17 00:00:00 2001 From: antleb Date: Wed, 22 Sep 2021 21:50:37 +0300 Subject: [PATCH 154/166] fixed typo --- .../scripts/step21-createObservatoryDB.sql | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql index e0bdcd685..e24370e7d 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step21-createObservatoryDB.sql @@ -30,7 +30,7 @@ from ${stats_db_name}.result r join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id - left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, @@ -59,7 +59,7 @@ from ${stats_db_name}.result r join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id - left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, @@ -88,7 +88,7 @@ from ${stats_db_name}.result r join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id - left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, @@ -119,7 +119,7 @@ from ${stats_db_name}.result r left outer join ${stats_db_name}.datasource d on d.id=rd.datasource left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id - left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, @@ -150,7 +150,7 @@ from ${stats_db_name}.result r left outer join ${stats_db_name}.datasource d on d.id=rd.datasource left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id - left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, @@ -179,7 +179,7 @@ from ${stats_db_name}.result r join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id - left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, @@ -208,7 +208,7 @@ from ${stats_db_name}.result r join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id - left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, @@ -239,7 +239,7 @@ from ${stats_db_name}.result r join ${stats_db_name}.project p on p.id=rp.project left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id - left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, @@ -270,7 +270,7 @@ from ${stats_db_name}.result r join ${stats_db_name}.project p on p.id=rp.project left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id - left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, @@ -301,7 +301,7 @@ from ${stats_db_name}.result r join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id - left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, @@ -332,7 +332,7 @@ from ${stats_db_name}.result r join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id - left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, @@ -363,7 +363,7 @@ from ${stats_db_name}.result r join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id - left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, @@ -394,7 +394,7 @@ from ${stats_db_name}.result r join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id - left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, @@ -425,7 +425,7 @@ from ${stats_db_name}.result r join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id - left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, @@ -456,7 +456,7 @@ from ${stats_db_name}.result r join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id - left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, @@ -487,7 +487,7 @@ from ${stats_db_name}.result r join ${stats_db_name}.country c on c.code=o.country and c.continent_name='Europe' left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id - left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, @@ -520,7 +520,7 @@ from ${stats_db_name}.result r join ${stats_db_name}.project p on p.id=rp.project left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id - left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, @@ -553,7 +553,7 @@ from ${stats_db_name}.result r join ${stats_db_name}.project p on p.id=rp.project left outer join ${stats_db_name}.result_licenses rl on rl.id=r.id left outer join ${stats_db_name}.result_pids pids on pids.id=r.id - left outer join ${stats_db_name}.result_cc_licence rln on rln.id=r.id + left outer join ${observatory_db_name}.result_cc_licence rln on rln.id=r.id left outer join ${stats_db_name}.result_projectcount rpc on rpc.id=r.id left outer join ${stats_db_name}.result_fundercount rfc on rfc.id=r.id group by r.green, r.gold, case when rl.type is not null then true else false end, case when pids.pid is not null then true else false end, From a1e1cf32d7ba0758785b0fa10eb61004eb18a70b Mon Sep 17 00:00:00 2001 From: antleb Date: Fri, 24 Sep 2021 12:57:24 +0300 Subject: [PATCH 155/166] fixed an impala error --- .../eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-pre.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-pre.sh b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-pre.sh index 92543b8b8..55a308c50 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-pre.sh +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/observatory-pre.sh @@ -13,4 +13,4 @@ export SHADOW=$3 echo "Creating observatory database" impala-shell -q "drop database if exists ${TARGET} cascade" impala-shell -q "create database if not exists ${TARGET}" -impala-shell -d ${SOURCE} -q "show tables" --delimited | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f - \ No newline at end of file +impala-shell -d ${SOURCE} -q "show tables" --delimited | grep -iv roar | sed "s/\(.*\)/create view ${TARGET}.\1 as select * from ${SOURCE}.\1;/" | impala-shell -f - From b924276e18b8b5a438251b89998471c1b800eb0c Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Fri, 24 Sep 2021 17:11:56 +0200 Subject: [PATCH 156/166] tests to generate records for the EOSC-Future demo with the EOSC Jupyter Notebbok subject --- .../provision/IndexRecordTransformerTest.java | 12 ++++ .../eosc-future/data-transfer-pilot.xml | 72 +++++++++++++++++++ .../training-notebooks-seadatanet.xml | 71 ++++++++++++++++++ .../eu/dnetlib/dhp/oa/provision/fields.xml | 16 +++-- 4 files changed, 167 insertions(+), 4 deletions(-) create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index cd07cfcb1..8daf318be 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -76,6 +76,18 @@ public class IndexRecordTransformerTest { testRecordTransformation(record); } + @Test + public void testForEOSCFutureDataTransferPilot() throws IOException, TransformerException { + final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/data-transfer-pilot.xml")); + testRecordTransformation(record); + } + + @Test + public void testForEOSCFutureTraining() throws IOException, TransformerException { + final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/training-notebooks-seadatanet.xml")); + testRecordTransformation(record); + } + private void testRecordTransformation(final String record) throws IOException, TransformerException { final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml new file mode 100644 index 000000000..23dd6c6ed --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml @@ -0,0 +1,72 @@ + + +
+ r37b0ad08687::dec0d8520e726f2adda9a51280ac7299 + 2021-09-22T08:53:16Z + under curation + +
+ + + + EGI-Foundation/data-transfer-pilot: Include libraries in environment.yml + Giuseppe La Rocca + Enol Fernández + Andrea Manzi + + + + This notebook is used to demonstrate how a scientist from one of the PaNOSC RIs can use the resources provided by EGI to perform analysis on the data sets obtained during an expirement. + + EOSC Jupyter Notebook + + Zenodo + + + + + + + + + + + + + + + + + + + oai:zenodo.org:4218562 + + oai:zenodo.org:4218562 + 10.5281/zenodo.4218562 + + + false + false + 0.9 + + + + + + + + + + + + + https://zenodo.org/record/4218562 + + + + + + +
+
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml new file mode 100644 index 000000000..9995b902f --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml @@ -0,0 +1,71 @@ + + +
+ r37b0ad08687::eb430fb7438e1533ba95d6aa50a477eb + 2021-09-22T08:53:13Z + under curation + +
+ + + + + EGI-Foundation/training-notebooks-seadatanet: Version 0.4 + Enol Fernández + + + + A sample notebook using SeaDataNet data to plot a map that shows surface temperature of Black Sea, Arctic Sea and Baltic Sea. The data is available at EGI DataHub with PID http://hdl.handle.net/21.T15999/qVk6JWQ (run at EGI Notebooks service for easy access to data).This release updates the PID for the data. + + EOSC Jupyter Notebook + + Zenodo + + + + + + + + + + + + + + + + + + + oai:zenodo.org:3561323 + + oai:zenodo.org:3561323 + 10.5281/zenodo.3561323 + + + false + false + 0.9 + + + + + + + + + + + + + https://zenodo.org/record/3561323 + + + + + + +
+
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml index c47975c9d..910a366f6 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/fields.xml @@ -15,7 +15,13 @@ - + + + + + + + @@ -28,7 +34,8 @@ - + + @@ -79,6 +86,7 @@ + @@ -105,7 +113,7 @@ - + @@ -130,7 +138,7 @@ - + From be79d74e3d7993e8dc160d89ccd0b182dec1b139 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Mon, 27 Sep 2021 16:57:04 +0200 Subject: [PATCH 157/166] Fixed DoiBoost generation to point to correct organization in affiliation relation --- .../doiboost/SparkGenerateDoiBoost.scala | 51 ++++++++++++++++++- .../doiboost/generate_doiboost_params.json | 3 +- .../doiboost/process/oozie_app/workflow.xml | 7 +++ 3 files changed, 58 insertions(+), 3 deletions(-) diff --git a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala index b77de13b9..e501b4823 100644 --- a/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala +++ b/dhp-workflows/dhp-doiboost/src/main/java/eu/dnetlib/doiboost/SparkGenerateDoiBoost.scala @@ -13,10 +13,30 @@ import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ +import org.json4s.DefaultFormats +import org.json4s.JsonAST.{JField, JObject, JString,JArray} +import org.json4s.jackson.JsonMethods.parse object SparkGenerateDoiBoost { + def extractIdGRID(input:String):List[(String,String)] = { + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + lazy val json: org.json4s.JValue = parse(input) + + val id:String = (json \ "id").extract[String] + + val grids:List[String] = for { + + JObject(pid) <- json \ "pid" + JField("qualifier", JObject(qualifier)) <- pid + JField("classid", JString(classid)) <-qualifier + JField("value", JString(vl)) <- pid + if classid == "GRID" + } yield vl + grids.map(g => (id, s"unresolved::grid::${g.toLowerCase}"))(collection.breakOut) + } + def main(args: Array[String]): Unit = { @@ -36,6 +56,7 @@ object SparkGenerateDoiBoost { val hostedByMapPath = parser.get("hostedByMapPath") val workingDirPath = parser.get("workingPath") + val openaireOrganizationPath = parser.get("openaireOrganizationPath") val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable { override def zero: Publication = new Publication @@ -156,7 +177,7 @@ object SparkGenerateDoiBoost { magPubs.joinWith(a,magPubs("_1").equalTo(a("PaperId"))).flatMap(item => { val pub:Publication = item._1._2 val affiliation = item._2 - val affId:String = if (affiliation.GridId.isDefined) DoiBoostMappingUtil.generateGridAffiliationId(affiliation.GridId.get) else DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString) + val affId:String = if (affiliation.GridId.isDefined) s"unresolved::grid::${affiliation.GridId.get.toLowerCase}" else DoiBoostMappingUtil.generateMAGAffiliationId(affiliation.AffiliationId.toString) val r:Relation = new Relation r.setSource(pub.getId) r.setTarget(affId) @@ -174,9 +195,35 @@ object SparkGenerateDoiBoost { r1.setDataInfo(pub.getDataInfo) r1.setCollectedfrom(List(DoiBoostMappingUtil.createMAGCollectedFrom()).asJava) List(r, r1) - })(mapEncoderRel).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationAffiliation") + })(mapEncoderRel).write.mode(SaveMode.Overwrite).save(s"$workingDirPath/doiBoostPublicationAffiliation_unresolved") + + + val unresolvedRels:Dataset[(String, Relation)] = spark.read.load(s"$workingDirPath/doiBoostPublicationAffiliation_unresolved").as[Relation].map(r => { + + if (r.getSource.startsWith("unresolved")) + (r.getSource, r) + else if (r.getTarget.startsWith("unresolved")) + (r.getTarget,r) + else + ("resolved", r) + }) + + val openaireOrganization:Dataset[(String,String)] = spark.read.text(openaireOrganizationPath).as[String].flatMap(s => extractIdGRID(s)).groupByKey(_._2).reduceGroups((x,y) => if (x != null) x else y ).map(_._2) + + unresolvedRels.joinWith(openaireOrganization,unresolvedRels("_1").equalTo(openaireOrganization("_2"))) + .map { x => + val currentRels = x._1._2 + val currentOrgs = x._2 + if (currentOrgs!= null) + if(currentRels.getSource.startsWith("unresolved")) + currentRels.setSource(currentOrgs._1) + else + currentRels.setTarget(currentOrgs._1) + currentRels + }.write.save(s"$workingDirPath/doiBoostPublicationAffiliation") + magPubs.joinWith(a,magPubs("_1").equalTo(a("PaperId"))).map( item => { val affiliation = item._2 if (affiliation.GridId.isEmpty) { diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json index 1ff63dd0e..39455fb67 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/generate_doiboost_params.json @@ -1,7 +1,8 @@ [ {"paramName": "m", "paramLongName":"master", "paramDescription": "the master name", "paramRequired": true}, {"paramName": "hb", "paramLongName":"hostedByMapPath", "paramDescription": "the hosted By Map Path", "paramRequired": true}, + {"paramName": "oo", "paramLongName":"openaireOrganizationPath", "paramDescription": "the openaire Organization Path", "paramRequired": true}, {"paramName": "ap", "paramLongName":"affiliationPath", "paramDescription": "the Affliation Path", "paramRequired": true}, {"paramName": "pa", "paramLongName":"paperAffiliationPath", "paramDescription": "the paperAffiliation Path", "paramRequired": true}, - {"paramName": "w", "paramLongName":"workingPath", "paramDescription": "the Working Path", "paramRequired": true} + {"paramName": "w", "paramLongName":"workingPath", "paramDescription": "the Working Path", "paramRequired": true} ] diff --git a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml index f5596b60e..eb82c3a7d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-doiboost/src/main/resources/eu/dnetlib/dhp/doiboost/process/oozie_app/workflow.xml @@ -27,6 +27,12 @@ hostedByMapPath the hostedByMap Path
+ + openaireOrganizationPath + the OpenAire Organizations Path + + + outputPath the Path of the sequence file action set @@ -214,6 +220,7 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --hostedByMapPath${hostedByMapPath} + --openaireOrganizationPath${openaireOrganizationPath} --affiliationPath${inputPathMAG}/dataset/Affiliations --paperAffiliationPath${inputPathMAG}/dataset/PaperAuthorAffiliations --workingPath${workingPath} From 66702b1973269c77b9780839d80375ad3e9843d9 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 28 Sep 2021 08:58:59 +0200 Subject: [PATCH 158/166] Added node to update datacite --- .../SparkDownloadUpdateDatacite.scala | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/SparkDownloadUpdateDatacite.scala diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/SparkDownloadUpdateDatacite.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/SparkDownloadUpdateDatacite.scala new file mode 100644 index 000000000..0b459fcf6 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/datacite/SparkDownloadUpdateDatacite.scala @@ -0,0 +1,53 @@ +package eu.dnetlib.dhp.actionmanager.datacite + + +import eu.dnetlib.dhp.application.ArgumentApplicationParser +import eu.dnetlib.dhp.schema.oaf.{Oaf, Result} +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.LocalFileSystem +import org.apache.hadoop.hdfs.DistributedFileSystem +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Encoder, Encoders, SparkSession} +import org.apache.spark.sql.functions.max +import org.slf4j.{Logger, LoggerFactory} + +import java.text.SimpleDateFormat +import java.util.{Date, Locale} +import scala.io.Source + +object SparkDownloadUpdateDatacite { + val log: Logger = LoggerFactory.getLogger(getClass) + + def main(args: Array[String]): Unit = { + + val conf = new SparkConf + val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json")).mkString) + parser.parseArgument(args) + val master = parser.get("master") + val sourcePath = parser.get("sourcePath") + val workingPath = parser.get("workingPath") + + val hdfsuri = parser.get("namenode") + log.info(s"namenode is $hdfsuri") + + + val spark: SparkSession = SparkSession.builder().config(conf) + .appName(getClass.getSimpleName) + .master(master) + .getOrCreate() + + implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf] + implicit val resEncoder: Encoder[Result] = Encoders.kryo[Result] + + import spark.implicits._ + + + val maxDate:String = spark.read.load(workingPath).as[Oaf].filter(s => s.isInstanceOf[Result]).map(r => r.asInstanceOf[Result].getDateofcollection).select(max("value")).first().getString(0) + val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US) + val string_to_date =ISO8601FORMAT.parse(maxDate) + val ts = string_to_date.getTime + + + } + +} From 60a6a9a58381a41d16fc30bf635d2bb4eb8868fc Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 30 Sep 2021 09:27:26 +0200 Subject: [PATCH 159/166] [graph2hive] added field 'measures' to the result view --- .../graph/hive/oozie_app/lib/scripts/postprocessing.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql index ea483a4a7..46e0eb5e1 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql @@ -1,10 +1,10 @@ DROP VIEW IF EXISTS ${hiveDbName}.result; CREATE VIEW IF NOT EXISTS ${hiveDbName}.result as - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance from ${hiveDbName}.publication p + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.publication p union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance from ${hiveDbName}.dataset d + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.dataset d union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance from ${hiveDbName}.software s + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.software s union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance from ${hiveDbName}.otherresearchproduct o; + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures from ${hiveDbName}.otherresearchproduct o; From 13687fd887616f2208dc45b9decbb23632e0b886 Mon Sep 17 00:00:00 2001 From: dimitrispie Date: Fri, 1 Oct 2021 16:02:02 +0300 Subject: [PATCH 160/166] Sprint 3 indicators update --- .../scripts/step16-createIndicatorsTables.sql | 48 ++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 020787039..0ea4a5adc 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -233,4 +233,50 @@ on p.id= tmp.id; create table indi_pub_has_abstract stored as parquet as select distinct publication.id, coalesce(abstract, 1) has_abstract -from publication; \ No newline at end of file +from publication; + +create table indi_with_orcid stored as parquet as +select distinct r.id, coalesce(has_orcid, 0) as has_orcid +from result r +left outer join (select id, 1 as has_orcid from result_orcid) tmp +on r.id= tmp.id + +create table indi_funded_result_with_fundref stored as parquet as +select distinct r.id, coalesce(fundref, 0) as fundref +from project_results r +left outer join (select distinct id, 1 as fundref from project_results +where provenance='Harvested') tmp +on r.id= tmp.id + +create table indi_result_org_country_collab stored as parquet as +with tmp as +(select o.id as id, o.country , ro.id as result,r.type from organization o +join result_organization ro on o.id=ro.organization +join result r on r.id=ro.id where o.country <> 'UNKNOWN') +select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations +from tmp as o1 +join tmp as o2 on o1.result=o2.result +where o1.id<>o2.id and o1.country<>o2.country +group by o1.id, o1.type,o2.country + +create table indi_result_org_collab stored as parquet as +with tmp as +(select o.id, ro.id as result,r.type from organization o +join result_organization ro on o.id=ro.organization +join result r on r.id=ro.id) +select o1.id org1,o2.id org2, o1.type, count(distinct o1.result) as collaborations +from tmp as o1 +join tmp as o2 on o1.result=o2.result +where o1.id<>o2.id +group by o1.id, o2.id, o1.type + +create table indi_result_org_country_collab stored as parquet as +with tmp as +(select o.id as id, o.country , ro.id as result,r.type from organization o +join result_organization ro on o.id=ro.organization +join result r on r.id=ro.id where o.country <> 'UNKNOWN') +select o1.id org1,o2.country country2, o1.type, count(distinct o1.result) as collaborations +from tmp as o1 +join tmp as o2 on o1.result=o2.result +where o1.id<>o2.id and o1.country<>o2.country +group by o1.id, o1.type,o2.country From 991b06bd0b03c867bdd429b9b70adff8b1b6ab89 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 5 Oct 2021 10:21:33 +0200 Subject: [PATCH 161/166] removed generation of EBI links from old dump, now EBI link dump is created by another wf --- .../eu/dnetlib/dhp/sx/graph/ebi/SparkEBILinksToOaf.scala | 5 ----- .../eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkEBILinksToOaf.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkEBILinksToOaf.scala index f14e5f264..1924d919e 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkEBILinksToOaf.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkEBILinksToOaf.scala @@ -32,14 +32,9 @@ object SparkEBILinksToOaf { import spark.implicits._ implicit val PMEncoder: Encoder[Oaf] = Encoders.kryo(classOf[Oaf]) - val ebi_rdd:Dataset[EBILinkItem] = spark.createDataset(spark.sparkContext.textFile(sourcePath).map(s => BioDBToOAF.extractEBILinksFromDump(s))).as[EBILinkItem] - - ebi_rdd.write.mode(SaveMode.Overwrite).save(s"${sourcePath}_dataset") - val ebLinks:Dataset[EBILinkItem] = spark.read.load(s"${sourcePath}_dataset").as[EBILinkItem].filter(l => l.links!= null) ebLinks.flatMap(j =>BioDBToOAF.parse_ebi_links(j.links)) - .repartition(4000) .filter(p => BioDBToOAF.EBITargetLinksFilter(p)) .flatMap(p => BioDBToOAF.convertEBILinksToOaf(p)) .write.mode(SaveMode.Overwrite).save(targetPath) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml index 3f442c5c6..1fa099ebc 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml @@ -74,7 +74,7 @@ yarn-cluster cluster - Create Baselnie DataSet + Create Baseline DataSet eu.dnetlib.dhp.sx.ebi.SparkAddLinkUpdates dhp-graph-mapper-${projectVersion}.jar From b84e0cabeba15fe5ba3de269aab8b47ac953ce10 Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Tue, 5 Oct 2021 16:34:47 +0200 Subject: [PATCH 162/166] Implemented new method for update baseline --- .../sx/graph/ebi/SparkDownloadEBILinks.scala | 100 +++++++++++++----- .../sx/graph/bio/pubmed/BioScholixTest.scala | 8 ++ 2 files changed, 79 insertions(+), 29 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala index 08e060459..eda825bd0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala @@ -4,6 +4,8 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF.EBILinkItem import eu.dnetlib.dhp.sx.graph.bio.pubmed.{PMArticle, PMAuthor, PMJournal} import org.apache.commons.io.IOUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.http.client.config.RequestConfig import org.apache.http.client.methods.{HttpGet, HttpUriRequest} import org.apache.http.impl.client.HttpClientBuilder @@ -25,38 +27,78 @@ object SparkDownloadEBILinks { } - def requestLinks(PMID:Long):String = { - val r = new HttpGet(s"https://www.ebi.ac.uk/europepmc/webservices/rest/MED/$PMID/datalinks?format=json") - val timeout = 60; // seconds - val config = RequestConfig.custom() - .setConnectTimeout(timeout * 1000) - .setConnectionRequestTimeout(timeout * 1000) - .setSocketTimeout(timeout * 1000).build() - val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() - try { - var tries = 4 - while (tries > 0) { - println(s"requesting ${r.getURI}") - try { - val response = client.execute(r) - println(s"get response with status${response.getStatusLine.getStatusCode}") - if (response.getStatusLine.getStatusCode > 400) { - tries -= 1 - } - else - return IOUtils.toString(response.getEntity.getContent) - } catch { - case e: Throwable => - println(s"Error on requesting ${r.getURI}") - e.printStackTrace() - tries -= 1 + def requestPage(url:String):String = { + val r = new HttpGet(url) + val timeout = 60; // seconds + val config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build() + val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() + try { + var tries = 4 + while (tries > 0) { + println(s"requesting ${r.getURI}") + try { + val response = client.execute(r) + println(s"get response with status${response.getStatusLine.getStatusCode}") + if (response.getStatusLine.getStatusCode > 400) { + tries -= 1 } + else + return IOUtils.toString(response.getEntity.getContent) + } catch { + case e: Throwable => + println(s"Error on requesting ${r.getURI}") + e.printStackTrace() + tries -= 1 } - "" - } finally { - if (client != null) - client.close() } + "" + } finally { + if (client != null) + client.close() + } + } + + + def requestBaseLineUpdatePage():List[String] = { + val data =requestPage("https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/") + + val result =data.lines.filter(l => l.startsWith("") + val start = l.indexOf("= 0 && end >start) + l.substring(start+9, (end-start)) + else + "" + }.filter(s =>s.endsWith(".gz") ).map(s => s"https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/$s").toList + + result + } + + def downloadBaseLineUpdate(baselinePath:String, hdfsServerUri:String ):Unit = { + + + val conf = new Configuration + conf.set("fs.defaultFS", hdfsServerUri) + val fs = FileSystem.get(conf) + val p = new Path((baselinePath)) + val files = fs.listFiles(p,false) + + while (files.hasNext) { + val c = files.next() + c.getPath + + } + + + } + + + def requestLinks(PMID:Long):String = { + requestPage(s"https://www.ebi.ac.uk/europepmc/webservices/rest/MED/$PMID/datalinks?format=json") } def main(args: Array[String]): Unit = { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala index 8e063db7c..b6058f71b 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala @@ -7,6 +7,7 @@ import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result} import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF.ScholixResolved import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF import eu.dnetlib.dhp.sx.graph.bio.pubmed.PubMedToOaf.dataInfo +import eu.dnetlib.dhp.sx.graph.ebi.SparkDownloadEBILinks import org.json4s.DefaultFormats import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.jackson.JsonMethods.parse @@ -50,6 +51,13 @@ class BioScholixTest extends AbstractVocabularyTest{ } + @Test + def testDownloadEBIUpdate() = { + val data = SparkDownloadEBILinks.requestBaseLineUpdatePage() + println(data) + } + + @Test def testEBIData() = { val inputXML = Source.fromInputStream(getClass.getResourceAsStream("pubmed.xml")).mkString From 2557bb41f54cbe8232372bfa1c554ce5d3177cbd Mon Sep 17 00:00:00 2001 From: Sandro La Bruzzo Date: Wed, 6 Oct 2021 16:41:08 +0200 Subject: [PATCH 163/166] Implemented new method for update baseline inside scala node --- .../ebi/SparkCreateBaselineDataFrame.scala | 123 +- .../sx/graph/ebi/SparkDownloadEBILinks.scala | 45 +- .../sx/graph/ebi/baseline_to_oaf_params.json | 9 +- .../dhp/sx/graph/ebi/ebi_download_update.json | 6 +- .../dhp/sx/graph/ebi/oozie_app/workflow.xml | 2 +- .../graph/ebi/update/oozie_app/workflow.xml | 116 +- .../sx/graph/bio/pubmed/BioScholixTest.scala | 7 +- .../dnetlib/dhp/sx/graph/bio/pubmed/ls_result | 1433 +++++++++++++++++ 8 files changed, 1625 insertions(+), 116 deletions(-) create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/ls_result diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkCreateBaselineDataFrame.scala index 26efd723f..ee76cf8ad 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkCreateBaselineDataFrame.scala @@ -6,18 +6,130 @@ import eu.dnetlib.dhp.schema.oaf.Result import eu.dnetlib.dhp.sx.graph.bio.pubmed.{PMArticle, PMAuthor, PMJournal, PMParser, PubMedToOaf} import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.commons.io.IOUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FSDataOutputStream, FileSystem, Path} +import org.apache.http.client.config.RequestConfig +import org.apache.http.client.methods.HttpGet +import org.apache.http.impl.client.HttpClientBuilder import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD import org.apache.spark.sql.expressions.Aggregator import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} +import java.io.InputStream import scala.io.Source import scala.xml.pull.XMLEventReader object SparkCreateBaselineDataFrame { + def requestBaseLineUpdatePage(maxFile:String):List[(String,String)] = { + val data =requestPage("https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/") + + val result =data.lines.filter(l => l.startsWith("") + val start = l.indexOf("= 0 && end >start) + l.substring(start+9, (end-start)) + else + "" + }.filter(s =>s.endsWith(".gz") ).filter(s => s > maxFile).map(s => (s,s"https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/$s")).toList + + result + } + + + def downloadBaselinePart(url:String):InputStream = { + val r = new HttpGet(url) + val timeout = 60; // seconds + val config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build() + val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() + val response = client.execute(r) + println(s"get response with status${response.getStatusLine.getStatusCode}") + response.getEntity.getContent + + } + + def requestPage(url:String):String = { + val r = new HttpGet(url) + val timeout = 60; // seconds + val config = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000).build() + val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build() + try { + var tries = 4 + while (tries > 0) { + println(s"requesting ${r.getURI}") + try { + val response = client.execute(r) + println(s"get response with status${response.getStatusLine.getStatusCode}") + if (response.getStatusLine.getStatusCode > 400) { + tries -= 1 + } + else + return IOUtils.toString(response.getEntity.getContent) + } catch { + case e: Throwable => + println(s"Error on requesting ${r.getURI}") + e.printStackTrace() + tries -= 1 + } + } + "" + } finally { + if (client != null) + client.close() + } + } + + + + + + + def downloadBaseLineUpdate(baselinePath:String, hdfsServerUri:String ):Unit = { + + + val conf = new Configuration + conf.set("fs.defaultFS", hdfsServerUri) + val fs = FileSystem.get(conf) + val p = new Path(baselinePath) + val files = fs.listFiles(p,false) + var max_file = "" + while (files.hasNext) { + val c = files.next() + val data = c.getPath.toString + val fileName = data.substring(data.lastIndexOf("/")+1) + + if (fileName> max_file) + max_file = fileName + } + + val files_to_download = requestBaseLineUpdatePage(max_file) + + files_to_download.foreach { u => + val hdfsWritePath: Path = new Path(s"$baselinePath/${u._1}") + val fsDataOutputStream: FSDataOutputStream = fs.create(hdfsWritePath, true) + val i = downloadBaselinePart(u._2) + val buffer = Array.fill[Byte](1024)(0) + while(i.read(buffer)>0) { + fsDataOutputStream.write(buffer) + } + i.close() + println(s"Downloaded ${u._2} into $baselinePath/${u._1}") + fsDataOutputStream.close() + } + + } + + val pmArticleAggregator: Aggregator[(String, PMArticle), PMArticle, PMArticle] = new Aggregator[(String, PMArticle), PMArticle, PMArticle] with Serializable { override def zero: PMArticle = new PMArticle @@ -51,6 +163,10 @@ object SparkCreateBaselineDataFrame { val targetPath = parser.get("targetPath") log.info("targetPath: {}", targetPath) + val hdfsServerUri = parser.get("hdfsServerUri") + log.info("hdfsServerUri: {}", targetPath) + + val isLookupService = ISLookupClientFactory.getLookUpService(isLookupUrl) val vocabularies = VocabularyGroup.loadVocsFromIS(isLookupService) val spark: SparkSession = @@ -61,16 +177,15 @@ object SparkCreateBaselineDataFrame { .master(parser.get("master")).getOrCreate() import spark.implicits._ - val sc = spark.sparkContext - - implicit val PMEncoder: Encoder[PMArticle] = Encoders.kryo(classOf[PMArticle]) implicit val PMJEncoder: Encoder[PMJournal] = Encoders.kryo(classOf[PMJournal]) implicit val PMAEncoder: Encoder[PMAuthor] = Encoders.kryo(classOf[PMAuthor]) implicit val resultEncoder: Encoder[Result] = Encoders.kryo(classOf[Result]) + downloadBaseLineUpdate(s"$workingPath/baseline", hdfsServerUri) + val k: RDD[(String, String)] = sc.wholeTextFiles(s"$workingPath/baseline",2000) val ds:Dataset[PMArticle] = spark.createDataset(k.filter(i => i._1.endsWith(".gz")).flatMap(i =>{ val xml = new XMLEventReader(Source.fromBytes(i._2.getBytes())) @@ -87,7 +202,5 @@ object SparkCreateBaselineDataFrame { .map(a => PubMedToOaf.convert(a, vocabularies)).as[Result] .filter(p => p!= null) .write.mode(SaveMode.Overwrite).save(targetPath) - - //s"$workingPath/oaf/baseline_oaf" } } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala index eda825bd0..e940fdff0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/sx/graph/ebi/SparkDownloadEBILinks.scala @@ -4,20 +4,16 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.sx.graph.bio.BioDBToOAF.EBILinkItem import eu.dnetlib.dhp.sx.graph.bio.pubmed.{PMArticle, PMAuthor, PMJournal} import org.apache.commons.io.IOUtils -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.http.client.config.RequestConfig -import org.apache.http.client.methods.{HttpGet, HttpUriRequest} +import org.apache.http.client.methods.HttpGet import org.apache.http.impl.client.HttpClientBuilder import org.apache.spark.SparkConf -import org.apache.spark.sql.expressions.Aggregator import org.apache.spark.sql.functions.max -import org.apache.spark.sql.{Dataset, Encoder, Encoders, SaveMode, SparkSession} +import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} object SparkDownloadEBILinks { - def createEBILinks(pmid:Long):EBILinkItem = { val res = requestLinks(pmid) @@ -26,7 +22,6 @@ object SparkDownloadEBILinks { null } - def requestPage(url:String):String = { val r = new HttpGet(url) val timeout = 60; // seconds @@ -61,42 +56,6 @@ object SparkDownloadEBILinks { } } - - def requestBaseLineUpdatePage():List[String] = { - val data =requestPage("https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/") - - val result =data.lines.filter(l => l.startsWith("") - val start = l.indexOf("= 0 && end >start) - l.substring(start+9, (end-start)) - else - "" - }.filter(s =>s.endsWith(".gz") ).map(s => s"https://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/$s").toList - - result - } - - def downloadBaseLineUpdate(baselinePath:String, hdfsServerUri:String ):Unit = { - - - val conf = new Configuration - conf.set("fs.defaultFS", hdfsServerUri) - val fs = FileSystem.get(conf) - val p = new Path((baselinePath)) - val files = fs.listFiles(p,false) - - while (files.hasNext) { - val c = files.next() - c.getPath - - } - - - } - - def requestLinks(PMID:Long):String = { requestPage(s"https://www.ebi.ac.uk/europepmc/webservices/rest/MED/$PMID/datalinks?format=json") diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/baseline_to_oaf_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/baseline_to_oaf_params.json index 38eb50094..4bee770bd 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/baseline_to_oaf_params.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/baseline_to_oaf_params.json @@ -1,6 +1,7 @@ [ - {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, - {"paramName":"i", "paramLongName":"isLookupUrl","paramDescription": "isLookupUrl", "paramRequired": true}, - {"paramName":"w", "paramLongName":"workingPath","paramDescription": "the path of the sequencial file to read", "paramRequired": true}, - {"paramName":"t", "paramLongName":"targetPath","paramDescription": "the oaf path ", "paramRequired": true} + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"i", "paramLongName":"isLookupUrl", "paramDescription": "isLookupUrl", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the path of the sequencial file to read", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the oaf path ", "paramRequired": true}, + {"paramName":"h", "paramLongName":"hdfsServerUri", "paramDescription": "the working path ", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json index 0ae19234a..0860ed558 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/ebi_download_update.json @@ -1,5 +1,5 @@ [ - {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, - {"paramName":"s", "paramLongName":"sourcePath","paramDescription": "the source Path", "paramRequired": true}, - {"paramName":"w", "paramLongName":"workingPath","paramDescription": "the working path ", "paramRequired": true} + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, + {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source Path", "paramRequired": true}, + {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the working path ", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml index 1fa099ebc..7612321c0 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/oozie_app/workflow.xml @@ -25,7 +25,6 @@ Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - yarn-cluster @@ -43,6 +42,7 @@ --workingPath${workingPath} --masteryarn + --hdfsServerUri${nameNode} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml index 1b738caed..cd3bb8c71 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/graph/ebi/update/oozie_app/workflow.xml @@ -1,59 +1,67 @@ - - - - sourcePath - the Working Path - - - workingPath - the Working Path - - - sparkDriverMemory - memory for driver process - - - sparkExecutorMemory - memory for individual executor - - - sparkExecutorCores - number of cores used by single executor - - + + + + sourcePath + the Working Path + + + workingPath + the Working Path + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + - + - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + - - - yarn-cluster - cluster - Incremental Download EBI Links - eu.dnetlib.dhp.sx.graph.ebi.SparkDownloadEBILinks - dhp-graph-mapper-${projectVersion}.jar - - --executor-memory=${sparkExecutorMemory} - --executor-cores=${sparkExecutorCores} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.shuffle.partitions=2000 - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath} - --workingPath${workingPath} - --masteryarn - - - - - - - \ No newline at end of file + + + yarn-cluster + cluster + Incremental Download EBI Links + eu.dnetlib.dhp.sx.graph.ebi.SparkDownloadEBILinks + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.shuffle.partitions=2000 + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath} + --workingPath${workingPath} + --masteryarn + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala index b6058f71b..87279eb21 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/sx/graph/bio/pubmed/BioScholixTest.scala @@ -51,12 +51,7 @@ class BioScholixTest extends AbstractVocabularyTest{ } - @Test - def testDownloadEBIUpdate() = { - val data = SparkDownloadEBILinks.requestBaseLineUpdatePage() - println(data) - } - + @Test def testEBIData() = { diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/ls_result b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/ls_result new file mode 100644 index 000000000..98a0841c4 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/sx/graph/bio/pubmed/ls_result @@ -0,0 +1,1433 @@ +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0001.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0002.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0003.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0004.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0005.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0006.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0007.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0008.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0009.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0010.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0011.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0012.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0013.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0014.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0015.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0016.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0017.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0018.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0019.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0020.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0021.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0022.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0023.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0024.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0025.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0026.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0027.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0028.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0029.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0030.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0031.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0032.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0033.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0034.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0035.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0036.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0037.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0038.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0039.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0040.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0041.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0042.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0043.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0044.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0045.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0046.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0047.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0048.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0049.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0050.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0051.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0052.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0053.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0054.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0055.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0056.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0057.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0058.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0059.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0060.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0061.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0062.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0063.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0064.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0065.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0066.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0067.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0068.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0069.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0070.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0071.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0072.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0073.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0074.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0075.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0076.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0077.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0078.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0079.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0080.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0081.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0082.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0083.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0084.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0085.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0086.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0087.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0088.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0089.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0090.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0091.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0092.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0093.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0094.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0095.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0096.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0097.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0098.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0099.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0100.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0101.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0102.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0103.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0104.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0105.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0106.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0107.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0108.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0109.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0110.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0111.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0112.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0113.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0114.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0115.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0116.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0117.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0118.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0119.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0120.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0121.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0122.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0123.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0124.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0125.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0126.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0127.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0128.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0129.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0130.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0131.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0132.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0133.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0134.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0135.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0136.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0137.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0138.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0139.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0140.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0141.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0142.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0143.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0144.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0145.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0146.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0147.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0148.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0149.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0150.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0151.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0152.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0153.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0154.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0155.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0156.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0157.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0158.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0159.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0160.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0161.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0162.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0163.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0164.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0165.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0166.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0167.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0168.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0169.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0170.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0171.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0172.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0173.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0174.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0175.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0176.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0177.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0178.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0179.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0180.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0181.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0182.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0183.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0184.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0185.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0186.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0187.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0188.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0189.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0190.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0191.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0192.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0193.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0194.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0195.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0196.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0197.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0198.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0199.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0200.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0201.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0202.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0203.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0204.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0205.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0206.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0207.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0208.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0209.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0210.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0211.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0212.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0213.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0214.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0215.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0216.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0217.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0218.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0219.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0220.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0221.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0222.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0223.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0224.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0225.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0226.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0227.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0228.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0229.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0230.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0231.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0232.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0233.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0234.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0235.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0236.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0237.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0238.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0239.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0240.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0241.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0242.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0243.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0244.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0245.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0246.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0247.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0248.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0249.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0250.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0251.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0252.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0253.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0254.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0255.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0256.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0257.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0258.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0259.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0260.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0261.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0262.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0263.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0264.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0265.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0266.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0267.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0268.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0269.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0270.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0271.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0272.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0273.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0274.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0275.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0276.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0277.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0278.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0279.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0280.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0281.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0282.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0283.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0284.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0285.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0286.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0287.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0288.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0289.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0290.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0291.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0292.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0293.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0294.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0295.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0296.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0297.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0298.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0299.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0300.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0301.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0302.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0303.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0304.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0305.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0306.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0307.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0308.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0309.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0310.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0311.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0312.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0313.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0314.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0315.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0316.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0317.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0318.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0319.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0320.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0321.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0322.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0323.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0324.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0325.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0326.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0327.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0328.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0329.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0330.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0331.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0332.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0333.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0334.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0335.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0336.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0337.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0338.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0339.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0340.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0341.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0342.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0343.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0344.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0345.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0346.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0347.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0348.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0349.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0350.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0351.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0352.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0353.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0354.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0355.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0356.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0357.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0358.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0359.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0360.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0361.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0362.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0363.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0364.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0365.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0366.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0367.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0368.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0369.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0370.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0371.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0372.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0373.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0374.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0375.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0376.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0377.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0378.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0379.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0380.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0381.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0382.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0383.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0384.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0385.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0386.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0387.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0388.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0389.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0390.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0391.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0392.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0393.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0394.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0395.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0396.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0397.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0398.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0399.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0400.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0401.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0402.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0403.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0404.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0405.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0406.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0407.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0408.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0409.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0410.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0411.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0412.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0413.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0414.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0415.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0416.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0417.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0418.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0419.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0420.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0421.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0422.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0423.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0424.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0425.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0426.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0427.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0428.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0429.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0430.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0431.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0432.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0433.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0434.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0435.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0436.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0437.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0438.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0439.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0440.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0441.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0442.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0443.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0444.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0445.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0446.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0447.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0448.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0449.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0450.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0451.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0452.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0453.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0454.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0455.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0456.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0457.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0458.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0459.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0460.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0461.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0462.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0463.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0464.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0465.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0466.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0467.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0468.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0469.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0470.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0471.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0472.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0473.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0474.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0475.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0476.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0477.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0478.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0479.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0480.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0481.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0482.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0483.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0484.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0485.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0486.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0487.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0488.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0489.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0490.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0491.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0492.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0493.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0494.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0495.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0496.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0497.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0498.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0499.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0500.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0501.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0502.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0503.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0504.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0505.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0506.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0507.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0508.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0509.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0510.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0511.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0512.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0513.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0514.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0515.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0516.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0517.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0518.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0519.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0520.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0521.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0522.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0523.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0524.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0525.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0526.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0527.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0528.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0529.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0530.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0531.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0532.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0533.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0534.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0535.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0536.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0537.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0538.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0539.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0540.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0541.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0542.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0543.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0544.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0545.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0546.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0547.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0548.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0549.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0550.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0551.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0552.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0553.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0554.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0555.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0556.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0557.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0558.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0559.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0560.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0561.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0562.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0563.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0564.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0565.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0566.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0567.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0568.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0569.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0570.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0571.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0572.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0573.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0574.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0575.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0576.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0577.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0578.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0579.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0580.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0581.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0582.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0583.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0584.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0585.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0586.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0587.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0588.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0589.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0590.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0591.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0592.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0593.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0594.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0595.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0596.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0597.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0598.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0599.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0600.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0601.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0602.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0603.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0604.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0605.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0606.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0607.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0608.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0609.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0610.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0611.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0612.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0613.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0614.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0615.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0616.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0617.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0618.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0619.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0620.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0621.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0622.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0623.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0624.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0625.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0626.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0627.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0628.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0629.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0630.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0631.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0632.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0633.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0634.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0635.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0636.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0637.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0638.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0639.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0640.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0641.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0642.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0643.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0644.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0645.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0646.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0647.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0648.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0649.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0650.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0651.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0652.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0653.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0654.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0655.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0656.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0657.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0658.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0659.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0660.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0661.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0662.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0663.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0664.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0665.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0666.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0667.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0668.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0669.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0670.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0671.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0672.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0673.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0674.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0675.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0676.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0677.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0678.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0679.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0680.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0681.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0682.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0683.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0684.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0685.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0686.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0687.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0688.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0689.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0690.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0691.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0692.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0693.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0694.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0695.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0696.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0697.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0698.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0699.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0700.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0701.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0702.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0703.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0704.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0705.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0706.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0707.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0708.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0709.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0710.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0711.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0712.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0713.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0714.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0715.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0716.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0717.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0718.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0719.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0720.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0721.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0722.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0723.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0724.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0725.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0726.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0727.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0728.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0729.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0730.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0731.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0732.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0733.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0734.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0735.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0736.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0737.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0738.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0739.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0740.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0741.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0742.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0743.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0744.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0745.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0746.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0747.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0748.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0749.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0750.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0751.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0752.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0753.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0754.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0755.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0756.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0757.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0758.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0759.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0760.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0761.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0762.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0763.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0764.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0765.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0766.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0767.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0768.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0769.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0770.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0771.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0772.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0773.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0774.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0775.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0776.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0777.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0778.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0779.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0780.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0781.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0782.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0783.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0784.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0785.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0786.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0787.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0788.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0789.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0790.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0791.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0792.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0793.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0794.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0795.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0796.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0797.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0798.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0799.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0800.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0801.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0802.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0803.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0804.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0805.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0806.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0807.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0808.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0809.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0810.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0811.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0812.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0813.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0814.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0815.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0816.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0817.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0818.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0819.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0820.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0821.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0822.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0823.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0824.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0825.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0826.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0827.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0828.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0829.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0830.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0831.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0832.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0833.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0834.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0835.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0836.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0837.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0838.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0839.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0840.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0841.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0842.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0843.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0844.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0845.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0846.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0847.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0848.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0849.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0850.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0851.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0852.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0853.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0854.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0855.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0856.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0857.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0858.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0859.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0860.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0861.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0862.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0863.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0864.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0865.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0866.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0867.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0868.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0869.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0870.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0871.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0872.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0873.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0874.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0875.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0876.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0877.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0878.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0879.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0880.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0881.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0882.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0883.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0884.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0885.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0886.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0887.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0888.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0889.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0890.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0891.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0892.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0893.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0894.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0895.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0896.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0897.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0898.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0899.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0900.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0901.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0902.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0903.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0904.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0905.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0906.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0907.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0908.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0909.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0910.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0911.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0912.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0913.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0914.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0915.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0916.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0917.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0918.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0919.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0920.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0921.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0922.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0923.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0924.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0925.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0926.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0927.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0928.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0929.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0930.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0931.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0932.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0933.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0934.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0935.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0936.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0937.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0938.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0939.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0940.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0941.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0942.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0943.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0944.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0945.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0946.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0947.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0948.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0949.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0950.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0951.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0952.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0953.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0954.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0955.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0956.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0957.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0958.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0959.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0960.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0961.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0962.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0963.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0964.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0965.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0966.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0967.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0968.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0969.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0970.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0971.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0972.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0973.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0974.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0975.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0976.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0977.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0978.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0979.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0980.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0981.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0982.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0983.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0984.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0985.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0986.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0987.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0988.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0989.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0990.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0991.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0992.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0993.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0994.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0995.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0996.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0997.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0998.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n0999.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1000.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1001.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1002.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1003.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1004.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1005.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1006.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1007.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1008.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1009.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1010.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1011.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1012.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1013.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1014.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1015.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1016.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1017.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1018.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1019.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1020.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1021.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1022.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1023.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1024.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1025.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1026.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1027.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1028.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1029.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1030.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1031.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1032.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1033.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1034.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1035.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1036.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1037.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1038.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1039.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1040.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1041.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1042.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1043.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1044.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1045.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1046.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1047.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1048.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1049.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1050.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1051.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1052.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1053.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1054.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1055.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1056.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1057.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1058.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1059.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1060.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1061.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1062.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1063.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1064.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1065.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1066.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1067.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1068.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1069.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1070.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1071.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1072.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1073.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1074.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1075.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1076.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1077.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1078.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1079.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1080.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1081.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1082.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1083.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1084.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1085.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1086.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1087.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1088.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1089.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1090.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1091.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1092.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1093.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1094.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1095.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1096.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1097.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1098.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1099.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1100.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1101.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1102.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1103.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1104.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1105.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1106.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1107.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1108.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1109.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1110.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1111.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1112.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1113.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1114.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1115.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1116.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1117.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1118.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1119.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1120.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1121.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1122.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1123.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1124.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1125.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1126.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1127.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1128.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1129.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1130.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1131.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1132.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1133.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1134.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1135.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1136.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1137.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1138.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1139.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1140.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1141.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1142.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1143.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1144.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1145.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1146.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1147.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1148.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1149.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1150.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1151.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1152.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1153.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1154.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1155.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1156.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1157.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1158.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1159.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1160.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1161.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1162.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1163.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1164.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1165.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1166.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1167.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1168.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1169.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1170.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1171.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1172.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1173.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1174.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1175.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1176.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1177.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1178.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1179.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1180.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1181.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1182.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1183.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1184.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1185.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1186.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1187.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1188.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1189.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1190.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1191.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1192.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1193.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1194.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1195.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1196.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1197.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1198.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1199.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1200.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1201.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1202.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1203.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1204.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1205.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1206.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1207.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1208.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1209.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1210.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1211.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1212.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1213.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1214.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1215.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1216.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1217.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1218.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1219.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1220.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1221.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1222.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1223.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1224.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1225.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1226.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1227.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1228.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1229.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1230.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1231.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1232.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1233.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1234.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1235.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1236.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1237.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1238.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1239.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1240.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1241.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1242.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1243.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1244.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1245.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1246.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1247.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1248.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1249.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1250.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1251.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1252.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1253.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1254.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1255.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1256.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1257.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1258.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1259.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1260.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1261.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1262.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1263.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1264.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1265.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1266.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1267.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1268.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1269.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1270.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1271.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1272.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1273.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1274.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1275.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1276.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1277.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1278.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1279.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1280.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1281.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1282.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1283.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1284.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1285.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1286.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1287.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1288.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1289.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1290.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1291.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1292.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1293.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1294.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1295.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1296.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1297.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1298.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1299.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1300.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1301.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1302.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1303.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1304.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1305.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1306.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1307.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1308.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1309.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1310.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1311.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1312.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1313.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1314.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1315.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1316.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1317.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1318.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1319.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1320.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1321.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1322.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1323.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1324.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1325.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1326.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1327.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1328.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1329.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1330.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1331.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1332.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1333.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1334.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1335.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1336.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1337.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1338.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1339.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1340.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1341.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1342.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1343.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1344.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1345.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1346.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1347.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1348.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1349.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1350.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1351.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1352.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1353.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1354.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1355.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1356.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1357.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1358.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1359.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1360.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1361.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1362.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1363.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1364.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1365.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1366.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1367.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1368.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1369.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1370.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1371.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1372.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1373.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1374.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1375.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1376.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1377.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1378.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1379.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1380.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1381.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1382.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1383.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1384.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1385.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1386.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1387.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1388.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1389.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1390.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1391.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1392.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1393.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1394.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1395.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1396.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1397.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1398.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1399.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1400.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1401.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1402.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1403.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1404.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1405.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1406.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1407.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1408.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1409.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1410.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1411.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1412.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1413.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1414.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1415.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1416.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1417.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1418.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1419.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1420.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1421.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1422.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1423.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1424.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1425.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1426.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1427.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1428.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1429.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1430.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1431.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1432.xml.gz +hdfs://nameservice1/data/scholix/input/baseline/pubmed21n1433.xml.gz \ No newline at end of file From 9646b9fd98b2e575dbe2bf258ccf708ad4c4be45 Mon Sep 17 00:00:00 2001 From: miconis Date: Thu, 7 Oct 2021 11:29:11 +0200 Subject: [PATCH 164/166] implementation of the http call for the update of openorgs suggestions --- .../dhp/oa/dedup/UpdateOpenorgsJob.java | 115 ++++++++++++++++++ .../oa/dedup/openorgs/oozie_app/workflow.xml | 20 +-- .../dedup/updateOpenorgsJob_parameters.json | 14 +++ 3 files changed, 141 insertions(+), 8 deletions(-) create mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/UpdateOpenorgsJob.java create mode 100644 dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/updateOpenorgsJob_parameters.json diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/UpdateOpenorgsJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/UpdateOpenorgsJob.java new file mode 100644 index 000000000..f23c54e2e --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/UpdateOpenorgsJob.java @@ -0,0 +1,115 @@ +package eu.dnetlib.dhp.oa.dedup; + +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import org.apache.commons.io.IOUtils; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.concurrent.TimeUnit; + +public class UpdateOpenorgsJob { + + private static final Logger log = LoggerFactory.getLogger(UpdateOpenorgsJob.class); + + public static void main(String[] args) throws Exception { + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils.toString(SparkCreateSimRels.class.getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/updateOpenorgsJob_parameters.json"))); + parser.parseArgument(args); + + final String apiUrl = parser.get("apiUrl"); + final int delay = Integer.parseInt(parser.get("delay")); + + log.info("apiUrl: '{}'", apiUrl); + log.info("delay: '{}'", delay); + + APIResponse res = httpCall(apiUrl); + while(res!=null && res.getStatus().equals(ImportStatus.RUNNING)){ + TimeUnit.MINUTES.sleep(delay); + res = httpCall(apiUrl + "/status"); + } + + if (res==null) { + log.error("Openorgs Update FAILED: No response"); + throw new RuntimeException("Openorgs Update FAILED: No response"); + } + + if (res.getStatus()==null || !res.getStatus().equals(ImportStatus.SUCCESS)) { + log.error("Openorgs Update FAILED: '{}' - '{}'", res.getStatus(), res.getMessage()); + throw new RuntimeException(res.getMessage()); + } + + } + + private static APIResponse httpCall(final String url) throws Exception { + final HttpGet req = new HttpGet(url); + + try (final CloseableHttpClient client = HttpClients.createDefault()) { + try (final CloseableHttpResponse response = client.execute(req)) { + final String s = IOUtils.toString(response.getEntity().getContent()); + return (new ObjectMapper()).readValue(s, APIResponse.class); + } + } + } + +} + +class APIResponse { + private String id; + private Long dateStart; + private Long dateEnd; + private ImportStatus status; + private String message; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public Long getDateStart() { + return dateStart; + } + + public void setDateStart(Long dateStart) { + this.dateStart = dateStart; + } + + public Long getDateEnd() { + return dateEnd; + } + + public void setDateEnd(Long dateEnd) { + this.dateEnd = dateEnd; + } + + public ImportStatus getStatus() { + return status; + } + + public void setStatus(ImportStatus status) { + this.status = status; + } + + public String getMessage() { + return message; + } + + public void setMessage(String message) { + this.message = message; + } +} + +enum ImportStatus { + SUCCESS, + FAILED, + RUNNING, + NOT_LAUNCHED, + NOT_YET_STARTED +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml index 30442406c..c7c6c9257 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml @@ -28,6 +28,11 @@ dbPwd password to access the OpenOrgs database + + dbConnections + 10 + number of connections to the postgres db + workingPath path for the working directory @@ -223,7 +228,7 @@ --dbTable${dbTable} --dbUser${dbUser} --dbPwd${dbPwd} - --numConnections20 + --numConnections${dbConnections} @@ -254,19 +259,18 @@ --dbTable${dbTable} --dbUser${dbUser} --dbPwd${dbPwd} - --numConnections20 + --numConnections${dbConnections} - - ${jobTracker} - ${nameNode} - /usr/bin/curl - ${apiUrl} - + + eu.dnetlib.dhp.oa.dedup.UpdateOpenorgsJob + --apiUrl${apiUrl} + --delay5 + diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/updateOpenorgsJob_parameters.json b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/updateOpenorgsJob_parameters.json new file mode 100644 index 000000000..5ca4a3dba --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/updateOpenorgsJob_parameters.json @@ -0,0 +1,14 @@ +[ + { + "paramName": "api", + "paramLongName": "apiUrl", + "paramDescription": "the url of the API", + "paramRequired": true + }, + { + "paramName": "d", + "paramLongName": "delay", + "paramDescription": "delay for the HTTP call in minutes", + "paramRequired": true + } +] \ No newline at end of file From 611ca511db42d8f07e3c21e7526a19d0755a57b3 Mon Sep 17 00:00:00 2001 From: miconis Date: Thu, 7 Oct 2021 15:39:55 +0200 Subject: [PATCH 165/166] set configuration property in openorgs duplicates wf --- .../dhp/oa/dedup/SparkWhitelistSimRels.java | 201 +-- .../dhp/oa/dedup/UpdateOpenorgsJob.java | 32 +- .../oa/dedup/openorgs/oozie_app/workflow.xml | 6 + .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 1266 +++++++++-------- 4 files changed, 761 insertions(+), 744 deletions(-) diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java index fa7d33570..7d91e47cc 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.oa.dedup; import java.io.IOException; @@ -5,10 +6,7 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.Dataset; @@ -21,7 +19,6 @@ import org.slf4j.LoggerFactory; import org.xml.sax.SAXException; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.dedup.model.Block; import eu.dnetlib.dhp.schema.oaf.DataInfo; import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.utils.ISLookupClientFactory; @@ -31,121 +28,127 @@ import eu.dnetlib.pace.config.DedupConfig; import eu.dnetlib.pace.model.MapDocument; import eu.dnetlib.pace.util.MapDocumentUtil; import scala.Tuple2; -import scala.Tuple3; public class SparkWhitelistSimRels extends AbstractSparkAction { - private static final Logger log = LoggerFactory.getLogger(SparkCreateSimRels.class); + private static final Logger log = LoggerFactory.getLogger(SparkCreateSimRels.class); - private static final String WHITELIST_SEPARATOR = "####"; + private static final String WHITELIST_SEPARATOR = "####"; - public SparkWhitelistSimRels(ArgumentApplicationParser parser, SparkSession spark) { - super(parser, spark); - } + public SparkWhitelistSimRels(ArgumentApplicationParser parser, SparkSession spark) { + super(parser, spark); + } - public static void main(String[] args) throws Exception { - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateSimRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json"))); - parser.parseArgument(args); + public static void main(String[] args) throws Exception { + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json"))); + parser.parseArgument(args); - SparkConf conf = new SparkConf(); - new SparkWhitelistSimRels(parser, getSparkSession(conf)) - .run(ISLookupClientFactory.getLookUpService(parser.get("isLookUpUrl"))); - } + SparkConf conf = new SparkConf(); + new SparkWhitelistSimRels(parser, getSparkSession(conf)) + .run(ISLookupClientFactory.getLookUpService(parser.get("isLookUpUrl"))); + } - @Override - public void run(ISLookUpService isLookUpService) - throws DocumentException, IOException, ISLookUpException, SAXException { + @Override + public void run(ISLookUpService isLookUpService) + throws DocumentException, IOException, ISLookUpException, SAXException { - // read oozie parameters - final String graphBasePath = parser.get("graphBasePath"); - final String isLookUpUrl = parser.get("isLookUpUrl"); - final String actionSetId = parser.get("actionSetId"); - final String workingPath = parser.get("workingPath"); - final int numPartitions = Optional - .ofNullable(parser.get("numPartitions")) - .map(Integer::valueOf) - .orElse(NUM_PARTITIONS); - final String whiteListPath = parser.get("whiteListPath"); + // read oozie parameters + final String graphBasePath = parser.get("graphBasePath"); + final String isLookUpUrl = parser.get("isLookUpUrl"); + final String actionSetId = parser.get("actionSetId"); + final String workingPath = parser.get("workingPath"); + final int numPartitions = Optional + .ofNullable(parser.get("numPartitions")) + .map(Integer::valueOf) + .orElse(NUM_PARTITIONS); + final String whiteListPath = parser.get("whiteListPath"); - log.info("numPartitions: '{}'", numPartitions); - log.info("graphBasePath: '{}'", graphBasePath); - log.info("isLookUpUrl: '{}'", isLookUpUrl); - log.info("actionSetId: '{}'", actionSetId); - log.info("workingPath: '{}'", workingPath); - log.info("whiteListPath: '{}'", whiteListPath); + log.info("numPartitions: '{}'", numPartitions); + log.info("graphBasePath: '{}'", graphBasePath); + log.info("isLookUpUrl: '{}'", isLookUpUrl); + log.info("actionSetId: '{}'", actionSetId); + log.info("workingPath: '{}'", workingPath); + log.info("whiteListPath: '{}'", whiteListPath); - JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - //file format: source####target - Dataset> whiteListRels = spark.createDataset(sc - .textFile(whiteListPath) - //check if the line is in the correct format: id1####id2 - .filter(s -> s.contains(WHITELIST_SEPARATOR) && s.split(WHITELIST_SEPARATOR).length == 2) - .map(s -> new Tuple2<>(s.split(WHITELIST_SEPARATOR)[0], s.split(WHITELIST_SEPARATOR)[1])) - .rdd(), - Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + // file format: source####target + Dataset> whiteListRels = spark + .createDataset( + sc + .textFile(whiteListPath) + // check if the line is in the correct format: id1####id2 + .filter(s -> s.contains(WHITELIST_SEPARATOR) && s.split(WHITELIST_SEPARATOR).length == 2) + .map(s -> new Tuple2<>(s.split(WHITELIST_SEPARATOR)[0], s.split(WHITELIST_SEPARATOR)[1])) + .rdd(), + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); - // for each dedup configuration - for (DedupConfig dedupConf : getConfigurations(isLookUpService, actionSetId)) { + // for each dedup configuration + for (DedupConfig dedupConf : getConfigurations(isLookUpService, actionSetId)) { - final String entity = dedupConf.getWf().getEntityType(); - final String subEntity = dedupConf.getWf().getSubEntityValue(); - log.info("Adding whitelist simrels for: '{}'", subEntity); + final String entity = dedupConf.getWf().getEntityType(); + final String subEntity = dedupConf.getWf().getSubEntityValue(); + log.info("Adding whitelist simrels for: '{}'", subEntity); - final String outputPath = DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity); + final String outputPath = DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity); - Dataset> entities = spark.createDataset(sc - .textFile(DedupUtility.createEntityPath(graphBasePath, subEntity)) - .repartition(numPartitions) - .mapToPair( - (PairFunction) s -> { - MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, s); - return new Tuple2<>(d.getIdentifier(), "present"); - }) - .rdd(), - Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + Dataset> entities = spark + .createDataset( + sc + .textFile(DedupUtility.createEntityPath(graphBasePath, subEntity)) + .repartition(numPartitions) + .mapToPair( + (PairFunction) s -> { + MapDocument d = MapDocumentUtil.asMapDocumentWithJPath(dedupConf, s); + return new Tuple2<>(d.getIdentifier(), "present"); + }) + .rdd(), + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); - Dataset> whiteListRels1 = whiteListRels - .joinWith(entities, whiteListRels.col("_1").equalTo(entities.col("_1")), "inner") - .map((MapFunction, Tuple2>, Tuple2>) Tuple2::_1, Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + Dataset> whiteListRels1 = whiteListRels + .joinWith(entities, whiteListRels.col("_1").equalTo(entities.col("_1")), "inner") + .map( + (MapFunction, Tuple2>, Tuple2>) Tuple2::_1, + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); - Dataset> whiteListRels2 = whiteListRels1 - .joinWith(entities, whiteListRels1.col("_2").equalTo(entities.col("_1")), "inner") - .map((MapFunction, Tuple2>, Tuple2>) Tuple2::_1, Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + Dataset> whiteListRels2 = whiteListRels1 + .joinWith(entities, whiteListRels1.col("_2").equalTo(entities.col("_1")), "inner") + .map( + (MapFunction, Tuple2>, Tuple2>) Tuple2::_1, + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); - Dataset whiteListSimRels = whiteListRels2 - .map((MapFunction, Relation>) - r -> createSimRel(r._1(), r._2(), entity), - Encoders.bean(Relation.class) - ); + Dataset whiteListSimRels = whiteListRels2 + .map( + (MapFunction, Relation>) r -> createSimRel(r._1(), r._2(), entity), + Encoders.bean(Relation.class)); - saveParquet(whiteListSimRels, outputPath, SaveMode.Append); - } - } + saveParquet(whiteListSimRels, outputPath, SaveMode.Append); + } + } - private Relation createSimRel(String source, String target, String entity) { - final Relation r = new Relation(); - r.setSource(source); - r.setTarget(target); - r.setSubRelType("dedupSimilarity"); - r.setRelClass("isSimilarTo"); - r.setDataInfo(new DataInfo()); + private Relation createSimRel(String source, String target, String entity) { + final Relation r = new Relation(); + r.setSource(source); + r.setTarget(target); + r.setSubRelType("dedupSimilarity"); + r.setRelClass("isSimilarTo"); + r.setDataInfo(new DataInfo()); - switch (entity) { - case "result": - r.setRelType("resultResult"); - break; - case "organization": - r.setRelType("organizationOrganization"); - break; - default: - throw new IllegalArgumentException("unmanaged entity type: " + entity); - } - return r; - } + switch (entity) { + case "result": + r.setRelType("resultResult"); + break; + case "organization": + r.setRelType("organizationOrganization"); + break; + default: + throw new IllegalArgumentException("unmanaged entity type: " + entity); + } + return r; + } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/UpdateOpenorgsJob.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/UpdateOpenorgsJob.java index f23c54e2e..d094fb72b 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/UpdateOpenorgsJob.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/UpdateOpenorgsJob.java @@ -1,7 +1,8 @@ + package eu.dnetlib.dhp.oa.dedup; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import java.util.concurrent.TimeUnit; + import org.apache.commons.io.IOUtils; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; @@ -10,16 +11,21 @@ import org.apache.http.impl.client.HttpClients; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.concurrent.TimeUnit; +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; public class UpdateOpenorgsJob { - private static final Logger log = LoggerFactory.getLogger(UpdateOpenorgsJob.class); + private static final Logger log = LoggerFactory.getLogger(UpdateOpenorgsJob.class); public static void main(String[] args) throws Exception { - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils.toString(SparkCreateSimRels.class.getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/updateOpenorgsJob_parameters.json"))); - parser.parseArgument(args); + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateSimRels.class + .getResourceAsStream("/eu/dnetlib/dhp/oa/dedup/updateOpenorgsJob_parameters.json"))); + parser.parseArgument(args); final String apiUrl = parser.get("apiUrl"); final int delay = Integer.parseInt(parser.get("delay")); @@ -28,17 +34,17 @@ public class UpdateOpenorgsJob { log.info("delay: '{}'", delay); APIResponse res = httpCall(apiUrl); - while(res!=null && res.getStatus().equals(ImportStatus.RUNNING)){ + while (res != null && res.getStatus().equals(ImportStatus.RUNNING)) { TimeUnit.MINUTES.sleep(delay); res = httpCall(apiUrl + "/status"); } - if (res==null) { + if (res == null) { log.error("Openorgs Update FAILED: No response"); throw new RuntimeException("Openorgs Update FAILED: No response"); } - if (res.getStatus()==null || !res.getStatus().equals(ImportStatus.SUCCESS)) { + if (res.getStatus() == null || !res.getStatus().equals(ImportStatus.SUCCESS)) { log.error("Openorgs Update FAILED: '{}' - '{}'", res.getStatus(), res.getMessage()); throw new RuntimeException(res.getMessage()); } @@ -107,9 +113,5 @@ class APIResponse { } enum ImportStatus { - SUCCESS, - FAILED, - RUNNING, - NOT_LAUNCHED, - NOT_YET_STARTED + SUCCESS, FAILED, RUNNING, NOT_LAUNCHED, NOT_YET_STARTED } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml index c7c6c9257..6947019e8 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/openorgs/oozie_app/workflow.xml @@ -267,6 +267,12 @@ + + + oozie.launcher.mapreduce.user.classpath.first + true + + eu.dnetlib.dhp.oa.dedup.UpdateOpenorgsJob --apiUrl${apiUrl} --delay5 diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index fa03f93a6..549988767 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -48,634 +48,640 @@ import scala.Tuple2; @TestMethodOrder(MethodOrderer.OrderAnnotation.class) public class SparkDedupTest implements Serializable { - @Mock(serializable = true) - ISLookUpService isLookUpService; - - private static SparkSession spark; - private static JavaSparkContext jsc; - - private static String testGraphBasePath; - private static String testOutputBasePath; - private static String testDedupGraphBasePath; - private static final String testActionSetId = "test-orchestrator"; - private static String whitelistPath; - private static List whiteList; - - private static String WHITELIST_SEPARATOR = "####"; - - @BeforeAll - public static void cleanUp() throws IOException, URISyntaxException { - - testGraphBasePath = Paths - .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/entities").toURI()) - .toFile() - .getAbsolutePath(); - testOutputBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") - .toAbsolutePath() - .toString(); - - testDedupGraphBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") - .toAbsolutePath() - .toString(); - - whitelistPath = Paths - .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/whitelist.simrels.txt").toURI()) - .toFile() - .getAbsolutePath(); - whiteList = IOUtils.readLines(new FileReader(whitelistPath)); - - FileUtils.deleteDirectory(new File(testOutputBasePath)); - FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); - - final SparkConf conf = new SparkConf(); - conf.set("spark.sql.shuffle.partitions", "200"); - spark = SparkSession - .builder() - .appName(SparkDedupTest.class.getSimpleName()) - .master("local[*]") - .config(conf) - .getOrCreate(); - - jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - - } - - @BeforeEach - public void setUp() throws IOException, ISLookUpException { - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains(testActionSetId))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator.xml"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("organization"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("publication"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("software"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("dataset"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json"))); - - lenient() - .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("otherresearchproduct"))) - .thenReturn( - IOUtils - .toString( - SparkDedupTest.class - .getResourceAsStream( - "/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json"))); - } - - @Test - @Order(1) - void createSimRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateSimRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json"))); - - parser - .parseArgument( - new String[]{ - "-i", testGraphBasePath, - "-asi", testActionSetId, - "-la", "lookupurl", - "-w", testOutputBasePath, - "-np", "50" - }); - - new SparkCreateSimRels(parser, spark).run(isLookUpService); - - long orgs_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) - .count(); - - long pubs_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) - .count(); - - long sw_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "software")) - .count(); - - long ds_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "dataset")) - .count(); - - long orp_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) - .count(); - - assertEquals(3082, orgs_simrel); - assertEquals(7036, pubs_simrel); - assertEquals(336, sw_simrel); - assertEquals(442, ds_simrel); - assertEquals(6750, orp_simrel); - } - - @Test - @Order(2) - void whitelistSimRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkWhitelistSimRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json"))); - - parser - .parseArgument( - new String[]{ - "-i", testGraphBasePath, - "-asi", testActionSetId, - "-la", "lookupurl", - "-w", testOutputBasePath, - "-np", "50", - "-wl", whitelistPath - }); - - new SparkWhitelistSimRels(parser, spark).run(isLookUpService); - - long orgs_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) - .count(); - - long pubs_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) - .count(); - - long ds_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "dataset")) - .count(); - - long orp_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) - .count(); - - //entities simrels supposed to be equal to the number of previous step (no rels in whitelist) - assertEquals(3082, orgs_simrel); - assertEquals(7036, pubs_simrel); - assertEquals(442, ds_simrel); - assertEquals(6750, orp_simrel); - - //entities simrels to be different from the number of previous step (new simrels in the whitelist) - Dataset sw_simrel = spark - .read() - .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "software")); - - //check if the first relation in the whitelist exists - assertTrue(sw_simrel - .as(Encoders.bean(Relation.class)) - .toJavaRDD() - .filter(rel -> - rel.getSource().equalsIgnoreCase(whiteList.get(0).split(WHITELIST_SEPARATOR)[0]) && rel.getTarget().equalsIgnoreCase(whiteList.get(0).split(WHITELIST_SEPARATOR)[1])).count() > 0); - //check if the second relation in the whitelist exists - assertTrue(sw_simrel - .as(Encoders.bean(Relation.class)) - .toJavaRDD() - .filter(rel -> - rel.getSource().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[0]) && rel.getTarget().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[1])).count() > 0); - - assertEquals(338, sw_simrel.count()); - - } - - @Test - @Order(3) - void cutMergeRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateMergeRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); - - parser - .parseArgument( - new String[]{ - "-i", - testGraphBasePath, - "-asi", - testActionSetId, - "-la", - "lookupurl", - "-w", - testOutputBasePath, - "-cc", - "3" - }); - - new SparkCreateMergeRels(parser, spark).run(isLookUpService); - - long orgs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - long pubs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - long sw_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - long ds_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - long orp_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") - .as(Encoders.bean(Relation.class)) - .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) - .groupBy("source") - .agg(count("target").alias("cnt")) - .select("source", "cnt") - .where("cnt > 3") - .count(); - - assertEquals(0, orgs_mergerel); - assertEquals(0, pubs_mergerel); - assertEquals(0, sw_mergerel); - assertEquals(0, ds_mergerel); - assertEquals(0, orp_mergerel); - - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel")); - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel")); - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/software_mergerel")); - FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel")); - FileUtils - .deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel")); - } - - @Test - @Order(4) - void createMergeRelsTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateMergeRels.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); - - parser - .parseArgument( - new String[]{ - "-i", - testGraphBasePath, - "-asi", - testActionSetId, - "-la", - "lookupurl", - "-w", - testOutputBasePath - }); - - new SparkCreateMergeRels(parser, spark).run(isLookUpService); - - long orgs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") - .count(); - long pubs_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") - .count(); - long sw_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") - .count(); - long ds_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") - .count(); - - long orp_mergerel = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") - .count(); - - assertEquals(1272, orgs_mergerel); - assertEquals(1438, pubs_mergerel); - assertEquals(286, sw_mergerel); - assertEquals(472, ds_mergerel); - assertEquals(718, orp_mergerel); - - } - - @Test - @Order(5) - void createDedupRecordTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkCreateDedupRecord.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/createDedupRecord_parameters.json"))); - parser - .parseArgument( - new String[]{ - "-i", - testGraphBasePath, - "-asi", - testActionSetId, - "-la", - "lookupurl", - "-w", - testOutputBasePath - }); - - new SparkCreateDedupRecord(parser, spark).run(isLookUpService); - - long orgs_deduprecord = jsc - .textFile(testOutputBasePath + "/" + testActionSetId + "/organization_deduprecord") - .count(); - long pubs_deduprecord = jsc - .textFile(testOutputBasePath + "/" + testActionSetId + "/publication_deduprecord") - .count(); - long sw_deduprecord = jsc - .textFile(testOutputBasePath + "/" + testActionSetId + "/software_deduprecord") - .count(); - long ds_deduprecord = jsc.textFile(testOutputBasePath + "/" + testActionSetId + "/dataset_deduprecord").count(); - long orp_deduprecord = jsc - .textFile( - testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_deduprecord") - .count(); - - assertEquals(85, orgs_deduprecord); - assertEquals(65, pubs_deduprecord); - assertEquals(49, sw_deduprecord); - assertEquals(97, ds_deduprecord); - assertEquals(89, orp_deduprecord); - } - - @Test - @Order(6) - void updateEntityTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkUpdateEntity.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/updateEntity_parameters.json"))); - parser - .parseArgument( - new String[]{ - "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath - }); - - new SparkUpdateEntity(parser, spark).run(isLookUpService); - - long organizations = jsc.textFile(testDedupGraphBasePath + "/organization").count(); - long publications = jsc.textFile(testDedupGraphBasePath + "/publication").count(); - long projects = jsc.textFile(testDedupGraphBasePath + "/project").count(); - long datasource = jsc.textFile(testDedupGraphBasePath + "/datasource").count(); - long softwares = jsc.textFile(testDedupGraphBasePath + "/software").count(); - long dataset = jsc.textFile(testDedupGraphBasePath + "/dataset").count(); - long otherresearchproduct = jsc.textFile(testDedupGraphBasePath + "/otherresearchproduct").count(); - - long mergedOrgs = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedPubs = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedSw = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedDs = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - long mergedOrp = spark - .read() - .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") - .as(Encoders.bean(Relation.class)) - .where("relClass=='merges'") - .javaRDD() - .map(Relation::getTarget) - .distinct() - .count(); - - assertEquals(896, publications); - assertEquals(838, organizations); - assertEquals(100, projects); - assertEquals(100, datasource); - assertEquals(198, softwares); - assertEquals(389, dataset); - assertEquals(517, otherresearchproduct); - - long deletedOrgs = jsc - .textFile(testDedupGraphBasePath + "/organization") - .filter(this::isDeletedByInference) - .count(); - - long deletedPubs = jsc - .textFile(testDedupGraphBasePath + "/publication") - .filter(this::isDeletedByInference) - .count(); - - long deletedSw = jsc - .textFile(testDedupGraphBasePath + "/software") - .filter(this::isDeletedByInference) - .count(); - - long deletedDs = jsc - .textFile(testDedupGraphBasePath + "/dataset") - .filter(this::isDeletedByInference) - .count(); - - long deletedOrp = jsc - .textFile(testDedupGraphBasePath + "/otherresearchproduct") - .filter(this::isDeletedByInference) - .count(); - - assertEquals(mergedOrgs, deletedOrgs); - assertEquals(mergedPubs, deletedPubs); - assertEquals(mergedSw, deletedSw); - assertEquals(mergedDs, deletedDs); - assertEquals(mergedOrp, deletedOrp); - } - - @Test - @Order(7) - void propagateRelationTest() throws Exception { - - ArgumentApplicationParser parser = new ArgumentApplicationParser( - IOUtils - .toString( - SparkPropagateRelation.class - .getResourceAsStream( - "/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json"))); - parser - .parseArgument( - new String[]{ - "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath - }); - - new SparkPropagateRelation(parser, spark).run(isLookUpService); - - long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); - - assertEquals(4860, relations); - - // check deletedbyinference - final Dataset mergeRels = spark - .read() - .load(DedupUtility.createMergeRelPath(testOutputBasePath, "*", "*")) - .as(Encoders.bean(Relation.class)); - final JavaPairRDD mergedIds = mergeRels - .where("relClass == 'merges'") - .select(mergeRels.col("target")) - .distinct() - .toJavaRDD() - .mapToPair( - (PairFunction) r -> new Tuple2(r.getString(0), "d")); - - JavaRDD toCheck = jsc - .textFile(testDedupGraphBasePath + "/relation") - .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.source", json), json)) - .join(mergedIds) - .map(t -> t._2()._1()) - .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.target", json), json)) - .join(mergedIds) - .map(t -> t._2()._1()); - - long deletedbyinference = toCheck.filter(this::isDeletedByInference).count(); - long updated = toCheck.count(); - - assertEquals(updated, deletedbyinference); - } - - @Test - @Order(8) - void testRelations() throws Exception { - testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_1.json", 12, 10); - testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_2.json", 10, 2); - } - - private void testUniqueness(String path, int expected_total, int expected_unique) { - Dataset rel = spark - .read() - .textFile(getClass().getResource(path).getPath()) - .map( - (MapFunction) s -> new ObjectMapper().readValue(s, Relation.class), - Encoders.bean(Relation.class)); - - assertEquals(expected_total, rel.count()); - assertEquals(expected_unique, rel.distinct().count()); - } - - @AfterAll - public static void finalCleanUp() throws IOException { - FileUtils.deleteDirectory(new File(testOutputBasePath)); - FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); - } - - public boolean isDeletedByInference(String s) { - return s.contains("\"deletedbyinference\":true"); - } + @Mock(serializable = true) + ISLookUpService isLookUpService; + + private static SparkSession spark; + private static JavaSparkContext jsc; + + private static String testGraphBasePath; + private static String testOutputBasePath; + private static String testDedupGraphBasePath; + private static final String testActionSetId = "test-orchestrator"; + private static String whitelistPath; + private static List whiteList; + + private static String WHITELIST_SEPARATOR = "####"; + + @BeforeAll + public static void cleanUp() throws IOException, URISyntaxException { + + testGraphBasePath = Paths + .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/entities").toURI()) + .toFile() + .getAbsolutePath(); + testOutputBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") + .toAbsolutePath() + .toString(); + + testDedupGraphBasePath = createTempDirectory(SparkDedupTest.class.getSimpleName() + "-") + .toAbsolutePath() + .toString(); + + whitelistPath = Paths + .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/whitelist.simrels.txt").toURI()) + .toFile() + .getAbsolutePath(); + whiteList = IOUtils.readLines(new FileReader(whitelistPath)); + + FileUtils.deleteDirectory(new File(testOutputBasePath)); + FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); + + final SparkConf conf = new SparkConf(); + conf.set("spark.sql.shuffle.partitions", "200"); + spark = SparkSession + .builder() + .appName(SparkDedupTest.class.getSimpleName()) + .master("local[*]") + .config(conf) + .getOrCreate(); + + jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + } + + @BeforeEach + public void setUp() throws IOException, ISLookUpException { + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains(testActionSetId))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/profiles/mock_orchestrator.xml"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("organization"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("publication"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("software"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("dataset"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json"))); + + lenient() + .when(isLookUpService.getResourceProfileByQuery(Mockito.contains("otherresearchproduct"))) + .thenReturn( + IOUtils + .toString( + SparkDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json"))); + } + + @Test + @Order(1) + void createSimRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createSimRels_parameters.json"))); + + parser + .parseArgument( + new String[] { + "-i", testGraphBasePath, + "-asi", testActionSetId, + "-la", "lookupurl", + "-w", testOutputBasePath, + "-np", "50" + }); + + new SparkCreateSimRels(parser, spark).run(isLookUpService); + + long orgs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) + .count(); + + long pubs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) + .count(); + + long sw_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "software")) + .count(); + + long ds_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "dataset")) + .count(); + + long orp_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) + .count(); + + assertEquals(3082, orgs_simrel); + assertEquals(7036, pubs_simrel); + assertEquals(336, sw_simrel); + assertEquals(442, ds_simrel); + assertEquals(6750, orp_simrel); + } + + @Test + @Order(2) + void whitelistSimRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkWhitelistSimRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/whitelistSimRels_parameters.json"))); + + parser + .parseArgument( + new String[] { + "-i", testGraphBasePath, + "-asi", testActionSetId, + "-la", "lookupurl", + "-w", testOutputBasePath, + "-np", "50", + "-wl", whitelistPath + }); + + new SparkWhitelistSimRels(parser, spark).run(isLookUpService); + + long orgs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) + .count(); + + long pubs_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "publication")) + .count(); + + long ds_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "dataset")) + .count(); + + long orp_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "otherresearchproduct")) + .count(); + + // entities simrels supposed to be equal to the number of previous step (no rels in whitelist) + assertEquals(3082, orgs_simrel); + assertEquals(7036, pubs_simrel); + assertEquals(442, ds_simrel); + assertEquals(6750, orp_simrel); + + // entities simrels to be different from the number of previous step (new simrels in the whitelist) + Dataset sw_simrel = spark + .read() + .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "software")); + + // check if the first relation in the whitelist exists + assertTrue( + sw_simrel + .as(Encoders.bean(Relation.class)) + .toJavaRDD() + .filter( + rel -> rel.getSource().equalsIgnoreCase(whiteList.get(0).split(WHITELIST_SEPARATOR)[0]) + && rel.getTarget().equalsIgnoreCase(whiteList.get(0).split(WHITELIST_SEPARATOR)[1])) + .count() > 0); + // check if the second relation in the whitelist exists + assertTrue( + sw_simrel + .as(Encoders.bean(Relation.class)) + .toJavaRDD() + .filter( + rel -> rel.getSource().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[0]) + && rel.getTarget().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[1])) + .count() > 0); + + assertEquals(338, sw_simrel.count()); + + } + + @Test + @Order(3) + void cutMergeRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateMergeRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); + + parser + .parseArgument( + new String[] { + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath, + "-cc", + "3" + }); + + new SparkCreateMergeRels(parser, spark).run(isLookUpService); + + long orgs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + long pubs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + long sw_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + long ds_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + long orp_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") + .as(Encoders.bean(Relation.class)) + .filter((FilterFunction) r -> r.getRelClass().equalsIgnoreCase("merges")) + .groupBy("source") + .agg(count("target").alias("cnt")) + .select("source", "cnt") + .where("cnt > 3") + .count(); + + assertEquals(0, orgs_mergerel); + assertEquals(0, pubs_mergerel); + assertEquals(0, sw_mergerel); + assertEquals(0, ds_mergerel); + assertEquals(0, orp_mergerel); + + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel")); + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel")); + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/software_mergerel")); + FileUtils.deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel")); + FileUtils + .deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel")); + } + + @Test + @Order(4) + void createMergeRelsTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateMergeRels.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json"))); + + parser + .parseArgument( + new String[] { + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath + }); + + new SparkCreateMergeRels(parser, spark).run(isLookUpService); + + long orgs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .count(); + long pubs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .count(); + long sw_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") + .count(); + long ds_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") + .count(); + + long orp_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") + .count(); + + assertEquals(1272, orgs_mergerel); + assertEquals(1438, pubs_mergerel); + assertEquals(286, sw_mergerel); + assertEquals(472, ds_mergerel); + assertEquals(718, orp_mergerel); + + } + + @Test + @Order(5) + void createDedupRecordTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkCreateDedupRecord.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/createDedupRecord_parameters.json"))); + parser + .parseArgument( + new String[] { + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath + }); + + new SparkCreateDedupRecord(parser, spark).run(isLookUpService); + + long orgs_deduprecord = jsc + .textFile(testOutputBasePath + "/" + testActionSetId + "/organization_deduprecord") + .count(); + long pubs_deduprecord = jsc + .textFile(testOutputBasePath + "/" + testActionSetId + "/publication_deduprecord") + .count(); + long sw_deduprecord = jsc + .textFile(testOutputBasePath + "/" + testActionSetId + "/software_deduprecord") + .count(); + long ds_deduprecord = jsc.textFile(testOutputBasePath + "/" + testActionSetId + "/dataset_deduprecord").count(); + long orp_deduprecord = jsc + .textFile( + testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_deduprecord") + .count(); + + assertEquals(85, orgs_deduprecord); + assertEquals(65, pubs_deduprecord); + assertEquals(49, sw_deduprecord); + assertEquals(97, ds_deduprecord); + assertEquals(89, orp_deduprecord); + } + + @Test + @Order(6) + void updateEntityTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkUpdateEntity.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/updateEntity_parameters.json"))); + parser + .parseArgument( + new String[] { + "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath + }); + + new SparkUpdateEntity(parser, spark).run(isLookUpService); + + long organizations = jsc.textFile(testDedupGraphBasePath + "/organization").count(); + long publications = jsc.textFile(testDedupGraphBasePath + "/publication").count(); + long projects = jsc.textFile(testDedupGraphBasePath + "/project").count(); + long datasource = jsc.textFile(testDedupGraphBasePath + "/datasource").count(); + long softwares = jsc.textFile(testDedupGraphBasePath + "/software").count(); + long dataset = jsc.textFile(testDedupGraphBasePath + "/dataset").count(); + long otherresearchproduct = jsc.textFile(testDedupGraphBasePath + "/otherresearchproduct").count(); + + long mergedOrgs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedPubs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedSw = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedDs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + long mergedOrp = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") + .as(Encoders.bean(Relation.class)) + .where("relClass=='merges'") + .javaRDD() + .map(Relation::getTarget) + .distinct() + .count(); + + assertEquals(896, publications); + assertEquals(838, organizations); + assertEquals(100, projects); + assertEquals(100, datasource); + assertEquals(198, softwares); + assertEquals(389, dataset); + assertEquals(517, otherresearchproduct); + + long deletedOrgs = jsc + .textFile(testDedupGraphBasePath + "/organization") + .filter(this::isDeletedByInference) + .count(); + + long deletedPubs = jsc + .textFile(testDedupGraphBasePath + "/publication") + .filter(this::isDeletedByInference) + .count(); + + long deletedSw = jsc + .textFile(testDedupGraphBasePath + "/software") + .filter(this::isDeletedByInference) + .count(); + + long deletedDs = jsc + .textFile(testDedupGraphBasePath + "/dataset") + .filter(this::isDeletedByInference) + .count(); + + long deletedOrp = jsc + .textFile(testDedupGraphBasePath + "/otherresearchproduct") + .filter(this::isDeletedByInference) + .count(); + + assertEquals(mergedOrgs, deletedOrgs); + assertEquals(mergedPubs, deletedPubs); + assertEquals(mergedSw, deletedSw); + assertEquals(mergedDs, deletedDs); + assertEquals(mergedOrp, deletedOrp); + } + + @Test + @Order(7) + void propagateRelationTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + SparkPropagateRelation.class + .getResourceAsStream( + "/eu/dnetlib/dhp/oa/dedup/propagateRelation_parameters.json"))); + parser + .parseArgument( + new String[] { + "-i", testGraphBasePath, "-w", testOutputBasePath, "-o", testDedupGraphBasePath + }); + + new SparkPropagateRelation(parser, spark).run(isLookUpService); + + long relations = jsc.textFile(testDedupGraphBasePath + "/relation").count(); + + assertEquals(4860, relations); + + // check deletedbyinference + final Dataset mergeRels = spark + .read() + .load(DedupUtility.createMergeRelPath(testOutputBasePath, "*", "*")) + .as(Encoders.bean(Relation.class)); + final JavaPairRDD mergedIds = mergeRels + .where("relClass == 'merges'") + .select(mergeRels.col("target")) + .distinct() + .toJavaRDD() + .mapToPair( + (PairFunction) r -> new Tuple2(r.getString(0), "d")); + + JavaRDD toCheck = jsc + .textFile(testDedupGraphBasePath + "/relation") + .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.source", json), json)) + .join(mergedIds) + .map(t -> t._2()._1()) + .mapToPair(json -> new Tuple2<>(MapDocumentUtil.getJPathString("$.target", json), json)) + .join(mergedIds) + .map(t -> t._2()._1()); + + long deletedbyinference = toCheck.filter(this::isDeletedByInference).count(); + long updated = toCheck.count(); + + assertEquals(updated, deletedbyinference); + } + + @Test + @Order(8) + void testRelations() throws Exception { + testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_1.json", 12, 10); + testUniqueness("/eu/dnetlib/dhp/dedup/test/relation_2.json", 10, 2); + } + + private void testUniqueness(String path, int expected_total, int expected_unique) { + Dataset rel = spark + .read() + .textFile(getClass().getResource(path).getPath()) + .map( + (MapFunction) s -> new ObjectMapper().readValue(s, Relation.class), + Encoders.bean(Relation.class)); + + assertEquals(expected_total, rel.count()); + assertEquals(expected_unique, rel.distinct().count()); + } + + @AfterAll + public static void finalCleanUp() throws IOException { + FileUtils.deleteDirectory(new File(testOutputBasePath)); + FileUtils.deleteDirectory(new File(testDedupGraphBasePath)); + } + + public boolean isDeletedByInference(String s) { + return s.contains("\"deletedbyinference\":true"); + } } From 8d3b60f4469f553acfad14e618414c930daebe05 Mon Sep 17 00:00:00 2001 From: Alessia Bardi Date: Thu, 7 Oct 2021 17:30:45 +0200 Subject: [PATCH 166/166] test for patching records for EOSC Future --- .../provision/IndexRecordTransformerTest.java | 20 ++ .../eosc-future/air-quality-copernicus.xml | 114 +++++++ .../eosc-future/b2share-plot-related-orp.xml | 288 ++++++++++++++++++ .../provision/eosc-future/b2share-plot-sw.xml | 112 +++++++ .../eosc-future/data-transfer-pilot.xml | 23 +- .../training-notebooks-seadatanet.xml | 11 +- 6 files changed, 551 insertions(+), 17 deletions(-) create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/air-quality-copernicus.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-related-orp.xml create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-sw.xml diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index 8daf318be..1c7dce3f2 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -88,6 +88,26 @@ public class IndexRecordTransformerTest { testRecordTransformation(record); } + @Test + public void testForEOSCFutureAirQualityCopernicus() throws IOException, TransformerException { + final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/air-quality-copernicus.xml")); + testRecordTransformation(record); + } + + @Test + public void testForEOSCFutureB2SharePlotSw() throws IOException, TransformerException { + final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/b2share-plot-sw.xml")); + testRecordTransformation(record); + } + + @Test + public void testForEOSCFutureB2SharePlotRelatedORP() throws IOException, TransformerException { + final String record = IOUtils.toString(getClass().getResourceAsStream("eosc-future/b2share-plot-related-orp.xml")); + testRecordTransformation(record); + } + + + private void testRecordTransformation(final String record) throws IOException, TransformerException { final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml")); final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl")); diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/air-quality-copernicus.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/air-quality-copernicus.xml new file mode 100644 index 000000000..43b256bbb --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/air-quality-copernicus.xml @@ -0,0 +1,114 @@ + + +
+ r37b0ad08687::a8df7db30ae0e4e0b875a098df7b652f + 2021-10-07T01:56:56Z + under curation + +
+ + + + + Using CAMS European air quality analysis from Copernicus + Atmosphere Monitoring with RELIANCE services + + Simone Mantovani + 2021-10-07 + + + + This notebook shows how to discover and access the Copernicus Atmosphere Monitoring products available in the RELIANCE datacube resources. + The process is structured in 6 steps, including example of data analysis and visualization with the Python libraries installed in the Jupyter environment + + + EOSC Jupyter Notebook + + RELIANCE + + Copernicus + + Air quality + + + + Zenodo + + + + + + + + + + + + + + + + + + + oai:zenodo.org:5554786 + + oai:zenodo.org:5554786 + + 10.5281/zenodo.5554786 + + + + false + false + 0.9 + + + + + + + corda__h2020::8771f523c34e38902d4921037d545ef8 + + REsearch LIfecycle mAnagemeNt for Earth Science Communities and CopErnicus users in EOSC + 101017501 + RELIANCE + + + ec__________::EC::H2020 + ec__________::EC::H2020::RIA + + + + + + + + + + + + https://zenodo.org/record/5554786 + + + + + + +
+
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-related-orp.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-related-orp.xml new file mode 100644 index 000000000..3c2c6440f --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-related-orp.xml @@ -0,0 +1,288 @@ + + +
+ doi_dedup___::44fd8a9b5b79adb0783ac245b21e3127 + 2019-09-19T07:43:31+0000 + 2019-09-19T07:43:31+0000 +
+ + + + + + + 10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + 10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + 6a93c069-a167-44cb-bfe8-74c275637347 + 50|r3730f562f9e::9b434fedc00d568b8e00611a7fa19f41 + 10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + ada23067-496a-494f-bd82-6ffe3cf4f0fb + 50|r3730f562f9e::b9cd774e8126b6902d56f9a4aa03e1dc + f3bd1041-422c-439d-8e68-c1d0711d130d + 50|r3730f562f9e::b847821a0ca5365b0d971dd89dea6bf1 + 10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + 10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + 10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + HCG16 L-band VLA C+D array final data + + + Jones, Michael G. + 2019-01-01 + These are the reduced final data associated with the paper Jones et al. 2019 submitted + to Astronomy & Astrophysics. They are used by a mybinder (https://gke.mybinder.org/) + executable environment to generate the final plots of that paper. The link for this environment + is https://mybinder.org/v2/gh/AMIGA-IAA/hcg-16/master. The raw VLA D and C array data of HCG 16 + were collected by the Very Large Array (http://www.vla.nrao.edu/) in 1989 and 1999, under PI + projects of Barbara Williams. The project numbers are AW234 and AW500 respectively. The file + also includes a grz colour image and r-band image from DECaLS DR8 + (http://legacysurvey.org/decamls/), a GBT HI spectrum published in Borthakur et al. 2010 (ApJ + 710, 385), an HI data cube from HIPASS (https://www.atnf.csiro.au/research/multibeam/release/), + and a source mask (and associated parameters file) for the HIPASS cube generated using SoFiA + (https://github.com/SoFiA-Admin/SoFiA-2). + + 3.5.2.1.1 → Observational astronomy → + Radio astronomy + + HI + + VLA + + HCG16 + + Various + + + 2019-01-01 + + https://b2share.eudat.eu + + + + true + false + 0.8 + dedup-similarity-result-decisiontree-v2 + + + + + userclaim___::ee29372a239b79db3ac4c5debe44d6e6 + + Plot scripts for HCG-16 Project + + + + + 2019-01-01 + HCG16 L-band VLA C+D + array final data + + + B2SHARE + + + 2019-01-01 + 10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + HCG16 L-band VLA C+D array final data + + + https://b2share.eudat.eu + + + 2019-01-01 + HCG16 L-band VLA C+D array final data + + + 10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + https://b2share.eudat.eu + + + 2019-01-01 + HCG16 L-band VLA C+D + array final data + + + B2SHARE + + + 2019-01-01 + HCG16 L-band VLA C+D array final data + + + https://b2share.eudat.eu + 10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + + + 2019-01-01 + HCG16 L-band VLA C+D + array final data + + + B2SHARE + + + + + + 2019-01-01 + + 10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + + + https://dx.doi.org/10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + + + + + + 2019-01-01 + + 10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + + + https://dx.doi.org/10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + + + + + + 2019-01-01 + + + https://doi.org10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + + + + http://dx.doi.org/https://doi.org/10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + + + + + + + 2019-01-01 + + 10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + + + https://dx.doi.org/10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + + + + + + 2019-01-01 + + + https://doi.org10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + + + + http://dx.doi.org/https://doi.org/10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + + + + + + + 2019-01-01 + + + https://doi.org10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + + + + http://dx.doi.org/https://doi.org/10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + + + + + + +
+
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-sw.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-sw.xml new file mode 100644 index 000000000..5f44f6b1f --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/b2share-plot-sw.xml @@ -0,0 +1,112 @@ + + +
+ userclaim___::ee29372a239b79db3ac4c5debe44d6e6 + 2021-10-07T12:42:54Z + +
+ + + + + Plot scripts for HCG-16 Project + + Jones, Michael G. + Jones, Michael G. + 2021-09-30 + + + These are the notebooks to general the final data plots of the paper Jones et al. 2019 + submitted to Astronomy & Astrophysics. They can be used in a notebooks environment (like + https://notebooks.egi.eu/) with the proper libraries installed. A mybinder + (https://mybinder.org/) + ready version can be started from https://mybinder.org/v2/gh/AMIGA-IAA/hcg-16/master. Data to + generate plots is also available from B2SHARE: + https://b2share.eudat.eu/records/a69a7b2dcc22449e8734552dde4d3906 + + + EOSC Jupyter Notebook + + + B2SHARE + + + + + + + + + + + + + + + + + + + userclaim___::ee29372a239b79db3ac4c5debe44d6e6 + + 10.23728/b2share.adf6e2e942b04561a8640c449b48c14a + + + + false + false + 0.9 + + + + + + doi_dedup___::44fd8a9b5b79adb0783ac245b21e3127 + HCG16 L-band VLA C+D array final data + 2019-01-01 + https://b2share.eudat.eu + 10.23728/b2share.ebcd2972c5fb44199f8b3fdf9f6413c6 + + 10.23728/b2share.a69a7b2dcc22449e8734552dde4d3906 + + 10.23728/b2share.7c8655b6f25348358b4e6fece7ab6016 + + + + + + + + + + + + 2021-09-30 + + http://dx.doi.org/10.23728/b2share.adf6e2e942b04561a8640c449b48c14a + + + + + + +
+
\ No newline at end of file diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml index 23dd6c6ed..6d2ac7630 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/data-transfer-pilot.xml @@ -1,26 +1,25 @@
- r37b0ad08687::dec0d8520e726f2adda9a51280ac7299 - 2021-09-22T08:53:16Z - under curation - + doi_dedup___::ab57f086011a9ae23d1165211dc6e04b + 2020-11-03T05:39:50+0000 + 2020-11-03T05:39:50+0000
EGI-Foundation/data-transfer-pilot: Include libraries in environment.yml - Giuseppe La Rocca - Enol Fernández - Andrea Manzi - + Giuseppe La Rocca + Enol Fernández + Andrea Manzi + 2020-11-03 This notebook is used to demonstrate how a scientist from one of the PaNOSC RIs can use the resources provided by EGI to perform analysis on the data sets obtained during an expirement. EOSC Jupyter Notebook - + 2020-11-03 Zenodo @@ -43,8 +42,8 @@ oai:zenodo.org:4218562 oai:zenodo.org:4218562 - 10.5281/zenodo.4218562 - + 10.5281/zenodo.4195418 + 10.5281/zenodo.4218562 false false @@ -59,7 +58,7 @@ - + 2020-11-03 https://zenodo.org/record/4218562 diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml index 9995b902f..9ab9b9861 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/eosc-future/training-notebooks-seadatanet.xml @@ -1,10 +1,8 @@
- r37b0ad08687::eb430fb7438e1533ba95d6aa50a477eb + doi_dedup___::8539a8de8996e01350f0de8ca4899b7f 2021-09-22T08:53:13Z - under curation -
EGI-Foundation/training-notebooks-seadatanet: Version 0.4 Enol Fernández - + 2019-12-04 - A sample notebook using SeaDataNet data to plot a map that shows surface temperature of Black Sea, Arctic Sea and Baltic Sea. The data is available at EGI DataHub with PID http://hdl.handle.net/21.T15999/qVk6JWQ (run at EGI Notebooks service for easy access to data).This release updates the PID for the data. + A sample notebook using SeaDataNet data to plot a map that shows surface temperature of Black Sea, Arctic Sea and Baltic Sea. The data is available at EGI DataHub with PID http://hdl.handle.net/21.T15999/3Byz9Cw (run at EGI Notebooks service for easy access to data). This release uses the correct path of the data share from the EGI DataHub. EOSC Jupyter Notebook @@ -43,6 +41,9 @@ oai:zenodo.org:3561323 10.5281/zenodo.3561323 + 10.5281/zenodo.3443996 + 10.5281/zenodo.3475539 + 10.5281/zenodo.3475785 false